aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2015-01-15 22:30:16 +0000
committerDimitry Andric <dim@FreeBSD.org>2015-01-15 22:30:16 +0000
commit9f61947910e6ab40de38e6b4034751ef1513200f (patch)
tree3231b7529d89052b2edb92bb5ddc6a9e960e5161
parent5ca98fd98791947eba83a1ed3f2c8191ef7afa6c (diff)
downloadsrc-9f61947910e6ab40de38e6b4034751ef1513200f.tar.gz
src-9f61947910e6ab40de38e6b4034751ef1513200f.zip
Vendor import of llvm RELEASE_351/final tag r225668 (effectively, 3.5.1 release):vendor/llvm/llvm-release_351-r225668
Notes
Notes: svn path=/vendor/llvm/dist/; revision=277220 svn path=/vendor/llvm/llvm-release_351-r225668/; revision=277221; tag=vendor/llvm/llvm-release_351-r225668
-rw-r--r--CMakeLists.txt2
-rw-r--r--autoconf/configure.ac4
-rw-r--r--cmake/modules/Makefile17
-rwxr-xr-xconfigure20
-rw-r--r--docs/ReleaseNotes.rst68
-rw-r--r--include/llvm/Analysis/AliasSetTracker.h5
-rw-r--r--include/llvm/CodeGen/CallingConvLower.h50
-rw-r--r--include/llvm/Target/TargetCallingConv.td16
-rw-r--r--lib/Analysis/AliasSetTracker.cpp42
-rw-r--r--lib/Analysis/BlockFrequencyInfoImpl.cpp22
-rw-r--r--lib/Analysis/ValueTracking.cpp38
-rw-r--r--lib/MC/MCObjectFileInfo.cpp2
-rw-r--r--lib/Target/ARM/ARMFrameLowering.cpp91
-rw-r--r--lib/Target/ARM/ARMFrameLowering.h2
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp6
-rw-r--r--lib/Target/Mips/AsmParser/MipsAsmParser.cpp4
-rw-r--r--lib/Target/Mips/CMakeLists.txt2
-rw-r--r--lib/Target/Mips/Disassembler/MipsDisassembler.cpp96
-rw-r--r--lib/Target/Mips/Mips.td10
-rw-r--r--lib/Target/Mips/Mips16ISelLowering.cpp7
-rw-r--r--lib/Target/Mips/Mips16ISelLowering.h6
-rw-r--r--lib/Target/Mips/Mips64InstrInfo.td4
-rw-r--r--lib/Target/Mips/MipsABIInfo.cpp45
-rw-r--r--lib/Target/Mips/MipsABIInfo.h61
-rw-r--r--lib/Target/Mips/MipsAsmPrinter.cpp26
-rw-r--r--lib/Target/Mips/MipsCCState.cpp142
-rw-r--r--lib/Target/Mips/MipsCCState.h137
-rw-r--r--lib/Target/Mips/MipsCallingConv.td205
-rw-r--r--lib/Target/Mips/MipsConstantIslandPass.cpp4
-rw-r--r--lib/Target/Mips/MipsISelLowering.cpp808
-rw-r--r--lib/Target/Mips/MipsISelLowering.h132
-rw-r--r--lib/Target/Mips/MipsInstrFPU.td48
-rw-r--r--lib/Target/Mips/MipsInstrInfo.td10
-rw-r--r--lib/Target/Mips/MipsLongBranch.cpp8
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.cpp6
-rw-r--r--lib/Target/Mips/MipsSEFrameLowering.cpp5
-rw-r--r--lib/Target/Mips/MipsSEISelLowering.cpp9
-rw-r--r--lib/Target/Mips/MipsSEISelLowering.h6
-rw-r--r--lib/Target/Mips/MipsSubtarget.cpp17
-rw-r--r--lib/Target/Mips/MipsSubtarget.h30
-rw-r--r--lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp11
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp25
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp17
-rw-r--r--lib/Target/PowerPC/PPC.h7
-rw-r--r--lib/Target/PowerPC/PPCAsmPrinter.cpp162
-rw-r--r--lib/Target/PowerPC/PPCFastISel.cpp33
-rw-r--r--lib/Target/PowerPC/PPCFrameLowering.cpp39
-rw-r--r--lib/Target/PowerPC/PPCISelDAGToDAG.cpp25
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp163
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.h18
-rw-r--r--lib/Target/PowerPC/PPCInstr64Bit.td19
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.td48
-rw-r--r--lib/Target/PowerPC/PPCMCInstLower.cpp6
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.td11
-rw-r--r--lib/Target/X86/X86FastISel.cpp3
-rw-r--r--lib/Transforms/Scalar/IndVarSimplify.cpp27
-rw-r--r--lib/Transforms/Scalar/MergedLoadStoreMotion.cpp24
-rw-r--r--lib/Transforms/Scalar/SROA.cpp5
-rw-r--r--lib/Transforms/Vectorize/LoopVectorize.cpp19
-rw-r--r--test/Analysis/BlockFrequencyInfo/extremely-likely-loop-successor.ll40
-rw-r--r--test/CodeGen/ARM/ghc-tcreturn-lowered.ll21
-rw-r--r--test/CodeGen/Mips/abicalls.ll21
-rw-r--r--test/CodeGen/Mips/atomic.ll14
-rw-r--r--test/CodeGen/Mips/bswap.ll36
-rw-r--r--test/CodeGen/Mips/cconv/arguments-float.ll22
-rw-r--r--test/CodeGen/Mips/cconv/arguments-hard-float-varargs.ll22
-rw-r--r--test/CodeGen/Mips/cconv/arguments-struct.ll41
-rw-r--r--test/CodeGen/Mips/cconv/arguments-varargs.ll1104
-rw-r--r--test/CodeGen/Mips/cconv/arguments.ll30
-rw-r--r--test/CodeGen/Mips/cconv/return-float.ll4
-rw-r--r--test/CodeGen/Mips/cconv/return-hard-float.ll4
-rw-r--r--test/CodeGen/Mips/cconv/return-hard-struct-f128.ll36
-rw-r--r--test/CodeGen/Mips/cconv/return-struct.ll232
-rw-r--r--test/CodeGen/Mips/cconv/return.ll4
-rwxr-xr-xtest/CodeGen/Mips/cmov.ll34
-rw-r--r--test/CodeGen/Mips/const-mult.ll8
-rw-r--r--test/CodeGen/Mips/countleading.ll4
-rw-r--r--test/CodeGen/Mips/divrem.ll12
-rw-r--r--test/CodeGen/Mips/ehframe-indirect.ll15
-rw-r--r--test/CodeGen/Mips/fastcc.ll86
-rw-r--r--test/CodeGen/Mips/fp64a.ll98
-rw-r--r--test/CodeGen/Mips/inlineasm-operand-code.ll27
-rw-r--r--test/CodeGen/Mips/load-store-left-right.ll4
-rw-r--r--test/CodeGen/Mips/longbranch.ll2
-rw-r--r--test/CodeGen/Mips/madd-msub.ll56
-rw-r--r--test/CodeGen/Mips/mips64-f128.ll4
-rw-r--r--test/CodeGen/Mips/mips64-sret.ll2
-rw-r--r--test/CodeGen/Mips/msa/frameindex.ll92
-rw-r--r--test/CodeGen/Mips/octeon_popcnt.ll6
-rw-r--r--test/CodeGen/Mips/select.ll20
-rw-r--r--test/CodeGen/Mips/zeroreg.ll4
-rw-r--r--test/CodeGen/PowerPC/blockaddress.ll26
-rw-r--r--test/CodeGen/PowerPC/cc.ll2
-rw-r--r--test/CodeGen/PowerPC/fast-isel-conversion.ll121
-rw-r--r--test/CodeGen/PowerPC/fast-isel-ret.ll66
-rw-r--r--test/CodeGen/PowerPC/ia-mem-r0.ll94
-rw-r--r--test/CodeGen/PowerPC/ia-neg-const.ll25
-rw-r--r--test/CodeGen/PowerPC/stack-realign.ll8
-rw-r--r--test/CodeGen/PowerPC/subreg-postra-2.ll175
-rw-r--r--test/CodeGen/PowerPC/subreg-postra.ll168
-rw-r--r--test/CodeGen/PowerPC/tls-pic.ll16
-rw-r--r--test/CodeGen/PowerPC/tls-store2.ll33
-rw-r--r--test/MC/Disassembler/Mips/mips2.txt13
-rw-r--r--test/MC/Disassembler/Mips/mips32.txt13
-rw-r--r--test/MC/Disassembler/Mips/mips64.txt6
-rw-r--r--test/MC/PowerPC/ppc64-localentry.s19
-rw-r--r--test/Transforms/GCOVProfiling/linezero.ll2
-rw-r--r--test/Transforms/IndVarSimplify/2011-10-27-lftrnull.ll2
-rw-r--r--test/Transforms/IndVarSimplify/lftr-address-space-pointers.ll4
-rw-r--r--test/Transforms/IndVarSimplify/lftr-extend-const.ll4
-rw-r--r--test/Transforms/IndVarSimplify/lftr-reuse.ll24
-rw-r--r--test/Transforms/IndVarSimplify/pr20680.ll219
-rw-r--r--test/Transforms/LICM/PR21582.ll40
-rw-r--r--test/Transforms/LICM/speculate.ll10
-rw-r--r--test/Transforms/LoopVectorize/incorrect-dom-info.ll142
-rw-r--r--test/Transforms/LoopVectorize/loop-form.ll31
-rw-r--r--test/Transforms/LoopVectorize/runtime-check-address-space.ll154
-rw-r--r--test/Transforms/LoopVectorize/runtime-check-readonly-address-space.ll120
-rw-r--r--test/Transforms/LoopVectorize/unsized-pointee-crash.ll24
-rw-r--r--test/Transforms/LoopVectorize/vect.stats.ll58
-rw-r--r--test/Transforms/SROA/phi-and-select.ll34
-rw-r--r--unittests/Analysis/LazyCallGraphTest.cpp32
-rw-r--r--utils/TableGen/CallingConvEmitter.cpp46
-rw-r--r--utils/lit/lit/Test.py15
-rwxr-xr-xutils/lit/lit/main.py47
125 files changed, 5248 insertions, 1525 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index b9fca2a386d1..728a4da69425 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -27,7 +27,7 @@ set(CMAKE_MODULE_PATH
set(LLVM_VERSION_MAJOR 3)
set(LLVM_VERSION_MINOR 5)
-set(LLVM_VERSION_PATCH 0)
+set(LLVM_VERSION_PATCH 1)
if (NOT PACKAGE_VERSION)
set(PACKAGE_VERSION "${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}.${LLVM_VERSION_PATCH}svn")
diff --git a/autoconf/configure.ac b/autoconf/configure.ac
index 34fadbe6fe3c..d3deac199a2e 100644
--- a/autoconf/configure.ac
+++ b/autoconf/configure.ac
@@ -32,11 +32,11 @@ dnl===-----------------------------------------------------------------------===
dnl Initialize autoconf and define the package name, version number and
dnl address for reporting bugs.
-AC_INIT([LLVM],[3.5.0],[http://llvm.org/bugs/])
+AC_INIT([LLVM],[3.5.1],[http://llvm.org/bugs/])
LLVM_VERSION_MAJOR=3
LLVM_VERSION_MINOR=5
-LLVM_VERSION_PATCH=0
+LLVM_VERSION_PATCH=1
LLVM_VERSION_SUFFIX=
AC_DEFINE_UNQUOTED([LLVM_VERSION_MAJOR], $LLVM_VERSION_MAJOR, [Major version of the LLVM API])
diff --git a/cmake/modules/Makefile b/cmake/modules/Makefile
index ba45f1737b17..dd31aa7926cf 100644
--- a/cmake/modules/Makefile
+++ b/cmake/modules/Makefile
@@ -33,6 +33,19 @@ else
LLVM_ENABLE_RTTI := 0
endif
+# Don't try to run llvm-config during clean because it won't be available
+ifneq ($(MAKECMDGOALS),clean)
+LLVM_LIBS_TO_EXPORT := $(subst -l,,$(shell $(LLVM_CONFIG) --libs $(LINK_COMPONENTS) || echo Error))
+
+ifeq ($(LLVM_LIBS_TO_EXPORT),Error)
+$(error llvm-config --libs failed)
+endif
+
+ifndef LLVM_LIBS_TO_EXPORT
+$(error LLVM_LIBS_TO_EXPORT cannot be empty)
+endif
+endif
+
OBJMODS := LLVMConfig.cmake LLVMConfigVersion.cmake LLVMExports.cmake
$(PROJ_OBJ_DIR)/LLVMConfig.cmake: LLVMConfig.cmake.in $(LLVMBuildCMakeFrag)
@@ -45,7 +58,7 @@ $(PROJ_OBJ_DIR)/LLVMConfig.cmake: LLVMConfig.cmake.in $(LLVMBuildCMakeFrag)
-e 's/@LLVM_VERSION_PATCH@/'"$(LLVM_VERSION_PATCH)"'/' \
-e 's/@PACKAGE_VERSION@/'"$(LLVMVersion)"'/' \
-e 's/@LLVM_COMMON_DEPENDS@//' \
- -e 's/@LLVM_AVAILABLE_LIBS@/'"$(subst -l,,$(LLVMConfigLibs))"'/' \
+ -e 's/@LLVM_AVAILABLE_LIBS@/'"$(LLVM_LIBS_TO_EXPORT)"'/' \
-e 's/@LLVM_ALL_TARGETS@/'"$(ALL_TARGETS)"'/' \
-e 's/@LLVM_TARGETS_TO_BUILD@/'"$(TARGETS_TO_BUILD)"'/' \
-e 's/@LLVM_TARGETS_WITH_JIT@/'"$(TARGETS_WITH_JIT)"'/' \
@@ -83,7 +96,7 @@ $(PROJ_OBJ_DIR)/LLVMExports.cmake: $(LLVMBuildCMakeExportsFrag)
$(Echo) 'Generating LLVM CMake target exports file'
$(Verb) ( \
echo '# LLVM CMake target exports. Do not include directly.' && \
- for lib in $(subst -l,,$(LLVMConfigLibs)); do \
+ for lib in $(LLVM_LIBS_TO_EXPORT); do \
echo 'add_library('"$$lib"' STATIC IMPORTED)' && \
echo 'set_property(TARGET '"$$lib"' PROPERTY IMPORTED_LOCATION "'"$(PROJ_libdir)/lib$$lib.a"'")' ; \
done && \
diff --git a/configure b/configure
index c11c3958fa77..de6fe38af963 100755
--- a/configure
+++ b/configure
@@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.60 for LLVM 3.5.0.
+# Generated by GNU Autoconf 2.60 for LLVM 3.5.1.
#
# Report bugs to <http://llvm.org/bugs/>.
#
@@ -561,8 +561,8 @@ SHELL=${CONFIG_SHELL-/bin/sh}
# Identity of this package.
PACKAGE_NAME='LLVM'
PACKAGE_TARNAME='llvm'
-PACKAGE_VERSION='3.5.0'
-PACKAGE_STRING='LLVM 3.5.0'
+PACKAGE_VERSION='3.5.1'
+PACKAGE_STRING='LLVM 3.5.1'
PACKAGE_BUGREPORT='http://llvm.org/bugs/'
ac_unique_file="lib/IR/Module.cpp"
@@ -1316,7 +1316,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
-\`configure' configures LLVM 3.5.0 to adapt to many kinds of systems.
+\`configure' configures LLVM 3.5.1 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1382,7 +1382,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
- short | recursive ) echo "Configuration of LLVM 3.5.0:";;
+ short | recursive ) echo "Configuration of LLVM 3.5.1:";;
esac
cat <<\_ACEOF
@@ -1553,7 +1553,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
-LLVM configure 3.5.0
+LLVM configure 3.5.1
generated by GNU Autoconf 2.60
Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001,
@@ -1569,7 +1569,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
-It was created by LLVM $as_me 3.5.0, which was
+It was created by LLVM $as_me 3.5.1, which was
generated by GNU Autoconf 2.60. Invocation command line was
$ $0 $@
@@ -1925,7 +1925,7 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu
LLVM_VERSION_MAJOR=3
LLVM_VERSION_MINOR=5
-LLVM_VERSION_PATCH=0
+LLVM_VERSION_PATCH=1
LLVM_VERSION_SUFFIX=
@@ -19245,7 +19245,7 @@ exec 6>&1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
-This file was extended by LLVM $as_me 3.5.0, which was
+This file was extended by LLVM $as_me 3.5.1, which was
generated by GNU Autoconf 2.60. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@@ -19298,7 +19298,7 @@ Report bugs to <bug-autoconf@gnu.org>."
_ACEOF
cat >>$CONFIG_STATUS <<_ACEOF
ac_cs_version="\\
-LLVM config.status 3.5.0
+LLVM config.status 3.5.1
configured by $0, generated by GNU Autoconf 2.60,
with options \\"`echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`\\"
diff --git a/docs/ReleaseNotes.rst b/docs/ReleaseNotes.rst
index 1b9537a6d3a0..87f96a64230f 100644
--- a/docs/ReleaseNotes.rst
+++ b/docs/ReleaseNotes.rst
@@ -24,6 +24,43 @@ them.
Non-comprehensive list of changes in this release
=================================================
+Changes to the MIPS Target
+--------------------------
+
+* A large number of bugs have been fixed for big-endian Mips targets using the
+ N32 and N64 ABI's. Please note that some of these bugs will still affect
+ LLVM-IR generated by LLVM 3.5 since correct code generation depends on
+ appropriate usage of the ``inreg``, ``signext``, and ``zeroext`` attributes
+ on all function arguments and returns.
+
+* The registers used to return a structure containing a single 128-bit floating
+ point member on the N32/N64 ABI's have been changed from those specified by
+ the ABI documentation to match those used by GCC. The documentation specifies
+ that ``$f0`` and ``$f2`` should be used but GCC has used ``$f0`` and ``$f1``
+ for many years.
+
+* Returning a zero-byte struct no longer causes incorrect code generation when
+ using the O32 ABI.
+
+* Passing structures of less than 32-bits using the O32 ABI on a big-endian
+ target has been fixed.
+
+* The exception personality has been changed for 64-bit Mips targets to
+ eliminate warnings about relocations in a read-only section.
+
+* Incorrect usage of odd-numbered single-precision floating point registers
+ has been fixed when the fastcc calling convention is used with 64-bit FPU's
+ and -mno-odd-spreg.
+
+* For inline assembly, the 'z' print-modifier print modifier can now be used on
+ non-immediate values.
+
+* Attempting to disassemble l[wd]c[23], s[wd]c[23], cache, and pref no longer
+ triggers an assertion.
+
+Non-comprehensive list of changes in 3.5
+========================================
+
* All backends have been changed to use the MC asm printer and support for the
non MC one has been removed.
@@ -217,6 +254,37 @@ We had also decided that the name of the combined backend should be AArch64,
following ARM's official documentation. So, at the end of May the old
AArch64 directory was removed, and ARM64 renamed into its place.
+Changes to the PowerPC Target
+-----------------------------
+
+The PowerPC 64-bit Little Endian subtarget (powerpc64le-unknown-linux-gnu) is
+now fully supported. This includes support for the Altivec instruction set.
+
+The Power Architecture 64-Bit ELFv2 ABI Specification is now supported, and
+is the default ABI for Little Endian. The ELFv1 ABI remains the default ABI
+for Big Endian. Currently, it is not possible to override these defaults.
+That capability will be available (albeit not recommended) in a future release.
+
+Links to the ELFv2 ABI specification and to the Power ISA Version 2.07
+specification may be found `here <https://www-03.ibm.com/technologyconnect/tgcm/TGCMServlet.wss?alias=OpenPOWER&linkid=1n0000>`_ (free registration required).
+Efforts are underway to move this to a location that doesn't require
+registration, but the planned site isn't ready yet.
+
+Experimental support for the VSX instruction set introduced with ISA 2.06
+is now available using the ``-mvsx`` switch. Work remains on this, so it
+is not recommended for production use. VSX is disabled for Little Endian
+regardless of this switch setting.
+
+Load/store cost estimates have been improved.
+
+Constant hoisting has been enabled.
+
+Global named register support has been enabled.
+
+Initial support for PIC code has been added for the 32-bit ELF subtarget.
+Further support will be available in a future release.
+
+
Changes to CMake build system
-----------------------------
diff --git a/include/llvm/Analysis/AliasSetTracker.h b/include/llvm/Analysis/AliasSetTracker.h
index 6117d91ec651..e32b6d628b7f 100644
--- a/include/llvm/Analysis/AliasSetTracker.h
+++ b/include/llvm/Analysis/AliasSetTracker.h
@@ -253,13 +253,16 @@ private:
const MDNode *TBAAInfo,
bool KnownMustAlias = false);
void addUnknownInst(Instruction *I, AliasAnalysis &AA);
- void removeUnknownInst(Instruction *I) {
+ void removeUnknownInst(AliasSetTracker &AST, Instruction *I) {
+ bool WasEmpty = UnknownInsts.empty();
for (size_t i = 0, e = UnknownInsts.size(); i != e; ++i)
if (UnknownInsts[i] == I) {
UnknownInsts[i] = UnknownInsts.back();
UnknownInsts.pop_back();
--i; --e; // Revisit the moved entry.
}
+ if (!WasEmpty && UnknownInsts.empty())
+ dropRef(AST);
}
void setVolatile() { Volatile = true; }
diff --git a/include/llvm/CodeGen/CallingConvLower.h b/include/llvm/CodeGen/CallingConvLower.h
index 04af4bd4d59d..abe00a167fd6 100644
--- a/include/llvm/CodeGen/CallingConvLower.h
+++ b/include/llvm/CodeGen/CallingConvLower.h
@@ -31,18 +31,25 @@ class TargetRegisterInfo;
class CCValAssign {
public:
enum LocInfo {
- Full, // The value fills the full location.
- SExt, // The value is sign extended in the location.
- ZExt, // The value is zero extended in the location.
- AExt, // The value is extended with undefined upper bits.
- BCvt, // The value is bit-converted in the location.
- VExt, // The value is vector-widened in the location.
- // FIXME: Not implemented yet. Code that uses AExt to mean
- // vector-widen should be fixed to use VExt instead.
- FPExt, // The floating-point value is fp-extended in the location.
- Indirect // The location contains pointer to the value.
+ Full, // The value fills the full location.
+ SExt, // The value is sign extended in the location.
+ ZExt, // The value is zero extended in the location.
+ AExt, // The value is extended with undefined upper bits.
+ BCvt, // The value is bit-converted in the location.
+ VExt, // The value is vector-widened in the location.
+ // FIXME: Not implemented yet. Code that uses AExt to mean
+ // vector-widen should be fixed to use VExt instead.
+ FPExt, // The floating-point value is fp-extended in the location.
+ Indirect, // The location contains pointer to the value.
+ SExtUpper, // The value is in the upper bits of the location and should be
+ // sign extended when retrieved.
+ ZExtUpper, // The value is in the upper bits of the location and should be
+ // zero extended when retrieved.
+ AExtUpper // The value is in the upper bits of the location and should be
+ // extended with undefined upper bits when retrieved.
// TODO: a subset of the value is in the location.
};
+
private:
/// ValNo - This is the value number begin assigned (e.g. an argument number).
unsigned ValNo;
@@ -146,6 +153,9 @@ public:
return (HTP == AExt || HTP == SExt || HTP == ZExt);
}
+ bool isUpperBitsInLoc() const {
+ return HTP == AExtUpper || HTP == SExtUpper || HTP == ZExtUpper;
+ }
};
/// CCAssignFn - This function assigns a location for Val, updating State to
@@ -208,10 +218,10 @@ private:
// while "%t" goes to the stack: it wouldn't be described in ByValRegs.
//
// Supposed use-case for this collection:
- // 1. Initially ByValRegs is empty, InRegsParamsProceed is 0.
+ // 1. Initially ByValRegs is empty, InRegsParamsProcessed is 0.
// 2. HandleByVal fillups ByValRegs.
// 3. Argument analysis (LowerFormatArguments, for example). After
- // some byval argument was analyzed, InRegsParamsProceed is increased.
+ // some byval argument was analyzed, InRegsParamsProcessed is increased.
struct ByValInfo {
ByValInfo(unsigned B, unsigned E, bool IsWaste = false) :
Begin(B), End(E), Waste(IsWaste) {}
@@ -229,9 +239,9 @@ private:
};
SmallVector<ByValInfo, 4 > ByValRegs;
- // InRegsParamsProceed - shows how many instances of ByValRegs was proceed
+ // InRegsParamsProcessed - shows how many instances of ByValRegs was proceed
// during argument analysis.
- unsigned InRegsParamsProceed;
+ unsigned InRegsParamsProcessed;
protected:
ParmContext CallOrPrologue;
@@ -412,7 +422,7 @@ public:
unsigned getInRegsParamsCount() const { return ByValRegs.size(); }
// Returns count of byval in-regs arguments proceed.
- unsigned getInRegsParamsProceed() const { return InRegsParamsProceed; }
+ unsigned getInRegsParamsProcessed() const { return InRegsParamsProcessed; }
// Get information about N-th byval parameter that is stored in registers.
// Here "ByValParamIndex" is N.
@@ -436,20 +446,20 @@ public:
// Returns false, if end is reached.
bool nextInRegsParam() {
unsigned e = ByValRegs.size();
- if (InRegsParamsProceed < e)
- ++InRegsParamsProceed;
- return InRegsParamsProceed < e;
+ if (InRegsParamsProcessed < e)
+ ++InRegsParamsProcessed;
+ return InRegsParamsProcessed < e;
}
// Clear byval registers tracking info.
void clearByValRegsInfo() {
- InRegsParamsProceed = 0;
+ InRegsParamsProcessed = 0;
ByValRegs.clear();
}
// Rewind byval registers tracking info.
void rewindByValRegsInfo() {
- InRegsParamsProceed = 0;
+ InRegsParamsProcessed = 0;
}
ParmContext getCallOrPrologue() const { return CallOrPrologue; }
diff --git a/include/llvm/Target/TargetCallingConv.td b/include/llvm/Target/TargetCallingConv.td
index 8f31e0898399..2e766c448b34 100644
--- a/include/llvm/Target/TargetCallingConv.td
+++ b/include/llvm/Target/TargetCallingConv.td
@@ -67,6 +67,9 @@ class CCIfSplit<CCAction A> : CCIf<"ArgFlags.isSplit()", A> {}
/// the specified action.
class CCIfSRet<CCAction A> : CCIf<"ArgFlags.isSRet()", A> {}
+/// CCIfVarArg - If the current function is vararg - apply the action
+class CCIfVarArg<CCAction A> : CCIf<"State.isVarArg()", A> {}
+
/// CCIfNotVarArg - If the current function is not vararg - apply the action
class CCIfNotVarArg<CCAction A> : CCIf<"!State.isVarArg()", A> {}
@@ -119,6 +122,12 @@ class CCPromoteToType<ValueType destTy> : CCAction {
ValueType DestTy = destTy;
}
+/// CCPromoteToUpperBitsInType - If applied, this promotes the specified current
+/// value to the specified type and shifts the value into the upper bits.
+class CCPromoteToUpperBitsInType<ValueType destTy> : CCAction {
+ ValueType DestTy = destTy;
+}
+
/// CCBitConvertToType - If applied, this bitconverts the specified current
/// value to the specified type.
class CCBitConvertToType<ValueType destTy> : CCAction {
@@ -141,6 +150,13 @@ class CCDelegateTo<CallingConv cc> : CCAction {
/// that the target supports.
class CallingConv<list<CCAction> actions> {
list<CCAction> Actions = actions;
+ bit Custom = 0;
+}
+
+/// CustomCallingConv - An instance of this is used to declare calling
+/// conventions that are implemented using a custom function of the same name.
+class CustomCallingConv : CallingConv<[]> {
+ let Custom = 1;
}
/// CalleeSavedRegs - A list of callee saved registers for a given calling
diff --git a/lib/Analysis/AliasSetTracker.cpp b/lib/Analysis/AliasSetTracker.cpp
index a45fe2389ee7..75b6be06aa8e 100644
--- a/lib/Analysis/AliasSetTracker.cpp
+++ b/lib/Analysis/AliasSetTracker.cpp
@@ -55,10 +55,13 @@ void AliasSet::mergeSetIn(AliasSet &AS, AliasSetTracker &AST) {
AliasTy = MayAlias;
}
+ bool ASHadUnknownInsts = !AS.UnknownInsts.empty();
if (UnknownInsts.empty()) { // Merge call sites...
- if (!AS.UnknownInsts.empty())
+ if (ASHadUnknownInsts) {
std::swap(UnknownInsts, AS.UnknownInsts);
- } else if (!AS.UnknownInsts.empty()) {
+ addRef();
+ }
+ } else if (ASHadUnknownInsts) {
UnknownInsts.insert(UnknownInsts.end(), AS.UnknownInsts.begin(), AS.UnknownInsts.end());
AS.UnknownInsts.clear();
}
@@ -76,6 +79,8 @@ void AliasSet::mergeSetIn(AliasSet &AS, AliasSetTracker &AST) {
AS.PtrListEnd = &AS.PtrList;
assert(*AS.PtrListEnd == nullptr && "End of list is not null?");
}
+ if (ASHadUnknownInsts)
+ AS.dropRef(AST);
}
void AliasSetTracker::removeAliasSet(AliasSet *AS) {
@@ -123,6 +128,8 @@ void AliasSet::addPointer(AliasSetTracker &AST, PointerRec &Entry,
}
void AliasSet::addUnknownInst(Instruction *I, AliasAnalysis &AA) {
+ if (UnknownInsts.empty())
+ addRef();
UnknownInsts.push_back(I);
if (!I->mayWriteToMemory()) {
@@ -218,13 +225,14 @@ AliasSet *AliasSetTracker::findAliasSetForPointer(const Value *Ptr,
uint64_t Size,
const MDNode *TBAAInfo) {
AliasSet *FoundSet = nullptr;
- for (iterator I = begin(), E = end(); I != E; ++I) {
- if (I->Forward || !I->aliasesPointer(Ptr, Size, TBAAInfo, AA)) continue;
+ for (iterator I = begin(), E = end(); I != E;) {
+ iterator Cur = I++;
+ if (Cur->Forward || !Cur->aliasesPointer(Ptr, Size, TBAAInfo, AA)) continue;
if (!FoundSet) { // If this is the first alias set ptr can go into.
- FoundSet = I; // Remember it.
+ FoundSet = Cur; // Remember it.
} else { // Otherwise, we must merge the sets.
- FoundSet->mergeSetIn(*I, *this); // Merge in contents.
+ FoundSet->mergeSetIn(*Cur, *this); // Merge in contents.
}
}
@@ -246,14 +254,14 @@ bool AliasSetTracker::containsPointer(Value *Ptr, uint64_t Size,
AliasSet *AliasSetTracker::findAliasSetForUnknownInst(Instruction *Inst) {
AliasSet *FoundSet = nullptr;
- for (iterator I = begin(), E = end(); I != E; ++I) {
- if (I->Forward || !I->aliasesUnknownInst(Inst, AA))
+ for (iterator I = begin(), E = end(); I != E;) {
+ iterator Cur = I++;
+ if (Cur->Forward || !Cur->aliasesUnknownInst(Inst, AA))
continue;
-
if (!FoundSet) // If this is the first alias set ptr can go into.
- FoundSet = I; // Remember it.
- else if (!I->Forward) // Otherwise, we must merge the sets.
- FoundSet->mergeSetIn(*I, *this); // Merge in contents.
+ FoundSet = Cur; // Remember it.
+ else if (!Cur->Forward) // Otherwise, we must merge the sets.
+ FoundSet->mergeSetIn(*Cur, *this); // Merge in contents.
}
return FoundSet;
}
@@ -393,6 +401,8 @@ void AliasSetTracker::add(const AliasSetTracker &AST) {
/// tracker.
void AliasSetTracker::remove(AliasSet &AS) {
// Drop all call sites.
+ if (!AS.UnknownInsts.empty())
+ AS.dropRef(*this);
AS.UnknownInsts.clear();
// Clear the alias set.
@@ -489,10 +499,10 @@ void AliasSetTracker::deleteValue(Value *PtrVal) {
if (Instruction *Inst = dyn_cast<Instruction>(PtrVal)) {
if (Inst->mayReadOrWriteMemory()) {
// Scan all the alias sets to see if this call site is contained.
- for (iterator I = begin(), E = end(); I != E; ++I) {
- if (I->Forward) continue;
-
- I->removeUnknownInst(Inst);
+ for (iterator I = begin(), E = end(); I != E;) {
+ iterator Cur = I++;
+ if (!Cur->Forward)
+ Cur->removeUnknownInst(*this, Inst);
}
}
}
diff --git a/lib/Analysis/BlockFrequencyInfoImpl.cpp b/lib/Analysis/BlockFrequencyInfoImpl.cpp
index 3203c371648d..723332d480f4 100644
--- a/lib/Analysis/BlockFrequencyInfoImpl.cpp
+++ b/lib/Analysis/BlockFrequencyInfoImpl.cpp
@@ -14,7 +14,7 @@
#include "llvm/Analysis/BlockFrequencyInfoImpl.h"
#include "llvm/ADT/SCCIterator.h"
#include "llvm/Support/raw_ostream.h"
-#include <deque>
+#include <numeric>
using namespace llvm;
using namespace llvm::bfi_detail;
@@ -123,8 +123,12 @@ static void combineWeight(Weight &W, const Weight &OtherW) {
}
assert(W.Type == OtherW.Type);
assert(W.TargetNode == OtherW.TargetNode);
- assert(W.Amount < W.Amount + OtherW.Amount && "Unexpected overflow");
- W.Amount += OtherW.Amount;
+ assert(OtherW.Amount && "Expected non-zero weight");
+ if (W.Amount > W.Amount + OtherW.Amount)
+ // Saturate on overflow.
+ W.Amount = UINT64_MAX;
+ else
+ W.Amount += OtherW.Amount;
}
static void combineWeightsBySorting(WeightList &Weights) {
// Sort so edges to the same node are adjacent.
@@ -207,11 +211,19 @@ void Distribution::normalize() {
Shift = 33 - countLeadingZeros(Total);
// Early exit if nothing needs to be scaled.
- if (!Shift)
+ if (!Shift) {
+ // If we didn't overflow then combineWeights() shouldn't have changed the
+ // sum of the weights, but let's double-check.
+ assert(Total == std::accumulate(Weights.begin(), Weights.end(), UINT64_C(0),
+ [](uint64_t Sum, const Weight &W) {
+ return Sum + W.Amount;
+ }) &&
+ "Expected total to be correct");
return;
+ }
// Recompute the total through accumulation (rather than shifting it) so that
- // it's accurate after shifting.
+ // it's accurate after shifting and any changes combineWeights() made above.
Total = 0;
// Sum the weights to each node and shift right if necessary.
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp
index e6d09f4e31f6..b6a3c36b91fe 100644
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -1987,23 +1987,31 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V,
default:
return true;
case Instruction::UDiv:
- case Instruction::URem:
- // x / y is undefined if y == 0, but calculations like x / 3 are safe.
- return isKnownNonZero(Inst->getOperand(1), TD);
+ case Instruction::URem: {
+ // x / y is undefined if y == 0.
+ const APInt *V;
+ if (match(Inst->getOperand(1), m_APInt(V)))
+ return *V != 0;
+ return false;
+ }
case Instruction::SDiv:
case Instruction::SRem: {
- Value *Op = Inst->getOperand(1);
- // x / y is undefined if y == 0
- if (!isKnownNonZero(Op, TD))
- return false;
- // x / y might be undefined if y == -1
- unsigned BitWidth = getBitWidth(Op->getType(), TD);
- if (BitWidth == 0)
- return false;
- APInt KnownZero(BitWidth, 0);
- APInt KnownOne(BitWidth, 0);
- computeKnownBits(Op, KnownZero, KnownOne, TD);
- return !!KnownZero;
+ // x / y is undefined if y == 0 or x == INT_MIN and y == -1
+ const APInt *X, *Y;
+ if (match(Inst->getOperand(1), m_APInt(Y))) {
+ if (*Y != 0) {
+ if (*Y == -1) {
+ // The numerator can't be MinSignedValue if the denominator is -1.
+ if (match(Inst->getOperand(0), m_APInt(X)))
+ return !Y->isMinSignedValue();
+ // The numerator *might* be MinSignedValue.
+ return false;
+ }
+ // The denominator is not 0 or -1, it's safe to proceed.
+ return true;
+ }
+ }
+ return false;
}
case Instruction::Load: {
const LoadInst *LI = cast<LoadInst>(Inst);
diff --git a/lib/MC/MCObjectFileInfo.cpp b/lib/MC/MCObjectFileInfo.cpp
index d490ef30b692..aeb92991b3d5 100644
--- a/lib/MC/MCObjectFileInfo.cpp
+++ b/lib/MC/MCObjectFileInfo.cpp
@@ -340,6 +340,8 @@ void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) {
break;
case Triple::mips:
case Triple::mipsel:
+ case Triple::mips64:
+ case Triple::mips64el:
// MIPS uses indirect pointer to refer personality functions, so that the
// eh_frame section can be read-only. DW.ref.personality will be generated
// for relocation.
diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp
index a67b3600c4fe..3624377c9402 100644
--- a/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/lib/Target/ARM/ARMFrameLowering.cpp
@@ -566,11 +566,59 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
AFI->setShouldRestoreSPFromFP(true);
}
+// Resolve TCReturn pseudo-instruction
+void ARMFrameLowering::fixTCReturn(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+ assert(MBBI->isReturn() && "Can only insert epilog into returning blocks");
+ unsigned RetOpcode = MBBI->getOpcode();
+ DebugLoc dl = MBBI->getDebugLoc();
+ const ARMBaseInstrInfo &TII =
+ *MF.getTarget().getSubtarget<ARMSubtarget>().getInstrInfo();
+
+ if (!(RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNri))
+ return;
+
+ // Tail call return: adjust the stack pointer and jump to callee.
+ MBBI = MBB.getLastNonDebugInstr();
+ MachineOperand &JumpTarget = MBBI->getOperand(0);
+
+ // Jump to label or value in register.
+ if (RetOpcode == ARM::TCRETURNdi) {
+ unsigned TCOpcode = STI.isThumb() ?
+ (STI.isTargetMachO() ? ARM::tTAILJMPd : ARM::tTAILJMPdND) :
+ ARM::TAILJMPd;
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(TCOpcode));
+ if (JumpTarget.isGlobal())
+ MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
+ JumpTarget.getTargetFlags());
+ else {
+ assert(JumpTarget.isSymbol());
+ MIB.addExternalSymbol(JumpTarget.getSymbolName(),
+ JumpTarget.getTargetFlags());
+ }
+
+ // Add the default predicate in Thumb mode.
+ if (STI.isThumb()) MIB.addImm(ARMCC::AL).addReg(0);
+ } else if (RetOpcode == ARM::TCRETURNri) {
+ BuildMI(MBB, MBBI, dl,
+ TII.get(STI.isThumb() ? ARM::tTAILJMPr : ARM::TAILJMPr)).
+ addReg(JumpTarget.getReg(), RegState::Kill);
+ }
+
+ MachineInstr *NewMI = std::prev(MBBI);
+ for (unsigned i = 1, e = MBBI->getNumOperands(); i != e; ++i)
+ NewMI->addOperand(MBBI->getOperand(i));
+
+ // Delete the pseudo instruction TCRETURN.
+ MBB.erase(MBBI);
+ MBBI = NewMI;
+}
+
void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
assert(MBBI->isReturn() && "Can only insert epilog into returning blocks");
- unsigned RetOpcode = MBBI->getOpcode();
DebugLoc dl = MBBI->getDebugLoc();
MachineFrameInfo *MFI = MF.getFrameInfo();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
@@ -588,8 +636,10 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
// All calls are tail calls in GHC calling conv, and functions have no
// prologue/epilogue.
- if (MF.getFunction()->getCallingConv() == CallingConv::GHC)
+ if (MF.getFunction()->getCallingConv() == CallingConv::GHC) {
+ fixTCReturn(MF, MBB);
return;
+ }
if (!AFI->hasStackFrame()) {
if (NumBytes - ArgRegsSaveSize != 0)
@@ -661,42 +711,7 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
if (AFI->getGPRCalleeSavedArea1Size()) MBBI++;
}
- if (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNri) {
- // Tail call return: adjust the stack pointer and jump to callee.
- MBBI = MBB.getLastNonDebugInstr();
- MachineOperand &JumpTarget = MBBI->getOperand(0);
-
- // Jump to label or value in register.
- if (RetOpcode == ARM::TCRETURNdi) {
- unsigned TCOpcode = STI.isThumb() ?
- (STI.isTargetMachO() ? ARM::tTAILJMPd : ARM::tTAILJMPdND) :
- ARM::TAILJMPd;
- MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(TCOpcode));
- if (JumpTarget.isGlobal())
- MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
- JumpTarget.getTargetFlags());
- else {
- assert(JumpTarget.isSymbol());
- MIB.addExternalSymbol(JumpTarget.getSymbolName(),
- JumpTarget.getTargetFlags());
- }
-
- // Add the default predicate in Thumb mode.
- if (STI.isThumb()) MIB.addImm(ARMCC::AL).addReg(0);
- } else if (RetOpcode == ARM::TCRETURNri) {
- BuildMI(MBB, MBBI, dl,
- TII.get(STI.isThumb() ? ARM::tTAILJMPr : ARM::TAILJMPr)).
- addReg(JumpTarget.getReg(), RegState::Kill);
- }
-
- MachineInstr *NewMI = std::prev(MBBI);
- for (unsigned i = 1, e = MBBI->getNumOperands(); i != e; ++i)
- NewMI->addOperand(MBBI->getOperand(i));
-
- // Delete the pseudo instruction TCRETURN.
- MBB.erase(MBBI);
- MBBI = NewMI;
- }
+ fixTCReturn(MF, MBB);
if (ArgRegsSaveSize)
emitSPUpdate(isARM, MBB, MBBI, dl, TII, ArgRegsSaveSize);
diff --git a/lib/Target/ARM/ARMFrameLowering.h b/lib/Target/ARM/ARMFrameLowering.h
index 709afbcdc681..74629c32b398 100644
--- a/lib/Target/ARM/ARMFrameLowering.h
+++ b/lib/Target/ARM/ARMFrameLowering.h
@@ -31,6 +31,8 @@ public:
void emitPrologue(MachineFunction &MF) const override;
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
+ void fixTCReturn(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
const std::vector<CalleeSavedInfo> &CSI,
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index a76531a3869a..f5baf2e4859c 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -1521,7 +1521,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// True if this byval aggregate will be split between registers
// and memory.
unsigned ByValArgsCount = CCInfo.getInRegsParamsCount();
- unsigned CurByValIdx = CCInfo.getInRegsParamsProceed();
+ unsigned CurByValIdx = CCInfo.getInRegsParamsProcessed();
if (CurByValIdx < ByValArgsCount) {
@@ -2962,7 +2962,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
if (Flags.isByVal()) {
unsigned ExtraArgRegsSize;
unsigned ExtraArgRegsSaveSize;
- computeRegArea(CCInfo, MF, CCInfo.getInRegsParamsProceed(),
+ computeRegArea(CCInfo, MF, CCInfo.getInRegsParamsProcessed(),
Flags.getByValSize(),
ExtraArgRegsSize, ExtraArgRegsSaveSize);
@@ -3086,7 +3086,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
// Since they could be overwritten by lowering of arguments in case of
// a tail call.
if (Flags.isByVal()) {
- unsigned CurByValIndex = CCInfo.getInRegsParamsProceed();
+ unsigned CurByValIndex = CCInfo.getInRegsParamsProcessed();
ByValStoreOffset = RoundUpToAlignment(ByValStoreOffset, Flags.getByValAlign());
int FrameIndex = StoreByValRegs(
diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
index 53b30f9210b0..2d65b08846c7 100644
--- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
+++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@@ -200,14 +200,14 @@ class MipsAsmParser : public MCTargetAsmParser {
// Example: INSERT.B $w0[n], $1 => 16 > n >= 0
bool validateMSAIndex(int Val, int RegKind);
- void setFeatureBits(unsigned Feature, StringRef FeatureString) {
+ void setFeatureBits(uint64_t Feature, StringRef FeatureString) {
if (!(STI.getFeatureBits() & Feature)) {
setAvailableFeatures(
ComputeAvailableFeatures(STI.ToggleFeature(FeatureString)));
}
}
- void clearFeatureBits(unsigned Feature, StringRef FeatureString) {
+ void clearFeatureBits(uint64_t Feature, StringRef FeatureString) {
if (STI.getFeatureBits() & Feature) {
setAvailableFeatures(
ComputeAvailableFeatures(STI.ToggleFeature(FeatureString)));
diff --git a/lib/Target/Mips/CMakeLists.txt b/lib/Target/Mips/CMakeLists.txt
index bf67d71b0a09..04f05236ccda 100644
--- a/lib/Target/Mips/CMakeLists.txt
+++ b/lib/Target/Mips/CMakeLists.txt
@@ -22,8 +22,10 @@ add_llvm_target(MipsCodeGen
Mips16ISelDAGToDAG.cpp
Mips16ISelLowering.cpp
Mips16RegisterInfo.cpp
+ MipsABIInfo.cpp
MipsAnalyzeImmediate.cpp
MipsAsmPrinter.cpp
+ MipsCCState.cpp
MipsCodeEmitter.cpp
MipsConstantIslandPass.cpp
MipsDelaySlotFiller.cpp
diff --git a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
index f35a8deefcff..53fb7a10ad0d 100644
--- a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
+++ b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
@@ -250,6 +250,11 @@ static DecodeStatus DecodeMem(MCInst &Inst,
uint64_t Address,
const void *Decoder);
+static DecodeStatus DecodeCacheOp(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
static DecodeStatus DecodeMSA128Mem(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
@@ -267,6 +272,14 @@ static DecodeStatus DecodeFMem(MCInst &Inst, unsigned Insn,
uint64_t Address,
const void *Decoder);
+static DecodeStatus DecodeFMem2(MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeFMem3(MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
static DecodeStatus DecodeSpecial3LlSc(MCInst &Inst,
unsigned Insn,
uint64_t Address,
@@ -451,7 +464,7 @@ static DecodeStatus DecodeAddiGroupBranch(MCInst &MI, InsnType insn,
InsnType Rs = fieldFromInstruction(insn, 21, 5);
InsnType Rt = fieldFromInstruction(insn, 16, 5);
- InsnType Imm = SignExtend64(fieldFromInstruction(insn, 0, 16), 16) << 2;
+ InsnType Imm = SignExtend64(fieldFromInstruction(insn, 0, 16), 16) * 4;
bool HasRs = false;
if (Rs >= Rt) {
@@ -490,7 +503,7 @@ static DecodeStatus DecodeDaddiGroupBranch(MCInst &MI, InsnType insn,
InsnType Rs = fieldFromInstruction(insn, 21, 5);
InsnType Rt = fieldFromInstruction(insn, 16, 5);
- InsnType Imm = SignExtend64(fieldFromInstruction(insn, 0, 16), 16) << 2;
+ InsnType Imm = SignExtend64(fieldFromInstruction(insn, 0, 16), 16) * 4;
bool HasRs = false;
if (Rs >= Rt) {
@@ -530,7 +543,7 @@ static DecodeStatus DecodeBlezlGroupBranch(MCInst &MI, InsnType insn,
InsnType Rs = fieldFromInstruction(insn, 21, 5);
InsnType Rt = fieldFromInstruction(insn, 16, 5);
- InsnType Imm = SignExtend64(fieldFromInstruction(insn, 0, 16), 16) << 2;
+ InsnType Imm = SignExtend64(fieldFromInstruction(insn, 0, 16), 16) * 4;
bool HasRs = false;
if (Rt == 0)
@@ -575,7 +588,7 @@ static DecodeStatus DecodeBgtzlGroupBranch(MCInst &MI, InsnType insn,
InsnType Rs = fieldFromInstruction(insn, 21, 5);
InsnType Rt = fieldFromInstruction(insn, 16, 5);
- InsnType Imm = SignExtend64(fieldFromInstruction(insn, 0, 16), 16) << 2;
+ InsnType Imm = SignExtend64(fieldFromInstruction(insn, 0, 16), 16) * 4;
if (Rt == 0)
return MCDisassembler::Fail;
@@ -617,7 +630,7 @@ static DecodeStatus DecodeBgtzGroupBranch(MCInst &MI, InsnType insn,
InsnType Rs = fieldFromInstruction(insn, 21, 5);
InsnType Rt = fieldFromInstruction(insn, 16, 5);
- InsnType Imm = SignExtend64(fieldFromInstruction(insn, 0, 16), 16) << 2;
+ InsnType Imm = SignExtend64(fieldFromInstruction(insn, 0, 16), 16) * 4;
bool HasRs = false;
bool HasRt = false;
@@ -666,7 +679,7 @@ static DecodeStatus DecodeBlezGroupBranch(MCInst &MI, InsnType insn,
InsnType Rs = fieldFromInstruction(insn, 21, 5);
InsnType Rt = fieldFromInstruction(insn, 16, 5);
- InsnType Imm = SignExtend64(fieldFromInstruction(insn, 0, 16), 16) << 2;
+ InsnType Imm = SignExtend64(fieldFromInstruction(insn, 0, 16), 16) * 4;
bool HasRs = false;
if (Rt == 0)
@@ -964,6 +977,23 @@ static DecodeStatus DecodeMem(MCInst &Inst,
return MCDisassembler::Success;
}
+static DecodeStatus DecodeCacheOp(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder) {
+ int Offset = SignExtend32<16>(Insn & 0xffff);
+ unsigned Hint = fieldFromInstruction(Insn, 16, 5);
+ unsigned Base = fieldFromInstruction(Insn, 21, 5);
+
+ Base = getReg(Decoder, Mips::GPR32RegClassID, Base);
+
+ Inst.addOperand(MCOperand::CreateReg(Base));
+ Inst.addOperand(MCOperand::CreateImm(Offset));
+ Inst.addOperand(MCOperand::CreateImm(Hint));
+
+ return MCDisassembler::Success;
+}
+
static DecodeStatus DecodeMSA128Mem(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
int Offset = SignExtend32<10>(fieldFromInstruction(Insn, 16, 10));
@@ -995,15 +1025,15 @@ static DecodeStatus DecodeMSA128Mem(MCInst &Inst, unsigned Insn,
break;
case Mips::LD_H:
case Mips::ST_H:
- Inst.addOperand(MCOperand::CreateImm(Offset << 1));
+ Inst.addOperand(MCOperand::CreateImm(Offset * 2));
break;
case Mips::LD_W:
case Mips::ST_W:
- Inst.addOperand(MCOperand::CreateImm(Offset << 2));
+ Inst.addOperand(MCOperand::CreateImm(Offset * 4));
break;
case Mips::LD_D:
case Mips::ST_D:
- Inst.addOperand(MCOperand::CreateImm(Offset << 3));
+ Inst.addOperand(MCOperand::CreateImm(Offset * 8));
break;
}
@@ -1067,6 +1097,42 @@ static DecodeStatus DecodeFMem(MCInst &Inst,
return MCDisassembler::Success;
}
+static DecodeStatus DecodeFMem2(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder) {
+ int Offset = SignExtend32<16>(Insn & 0xffff);
+ unsigned Reg = fieldFromInstruction(Insn, 16, 5);
+ unsigned Base = fieldFromInstruction(Insn, 21, 5);
+
+ Reg = getReg(Decoder, Mips::COP2RegClassID, Reg);
+ Base = getReg(Decoder, Mips::GPR32RegClassID, Base);
+
+ Inst.addOperand(MCOperand::CreateReg(Reg));
+ Inst.addOperand(MCOperand::CreateReg(Base));
+ Inst.addOperand(MCOperand::CreateImm(Offset));
+
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeFMem3(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder) {
+ int Offset = SignExtend32<16>(Insn & 0xffff);
+ unsigned Reg = fieldFromInstruction(Insn, 16, 5);
+ unsigned Base = fieldFromInstruction(Insn, 21, 5);
+
+ Reg = getReg(Decoder, Mips::COP3RegClassID, Reg);
+ Base = getReg(Decoder, Mips::GPR32RegClassID, Base);
+
+ Inst.addOperand(MCOperand::CreateReg(Reg));
+ Inst.addOperand(MCOperand::CreateReg(Base));
+ Inst.addOperand(MCOperand::CreateImm(Offset));
+
+ return MCDisassembler::Success;
+}
+
static DecodeStatus DecodeSpecial3LlSc(MCInst &Inst,
unsigned Insn,
uint64_t Address,
@@ -1225,7 +1291,7 @@ static DecodeStatus DecodeBranchTarget(MCInst &Inst,
unsigned Offset,
uint64_t Address,
const void *Decoder) {
- int32_t BranchOffset = (SignExtend32<16>(Offset) << 2) + 4;
+ int32_t BranchOffset = (SignExtend32<16>(Offset) * 4) + 4;
Inst.addOperand(MCOperand::CreateImm(BranchOffset));
return MCDisassembler::Success;
}
@@ -1244,7 +1310,7 @@ static DecodeStatus DecodeBranchTarget21(MCInst &Inst,
unsigned Offset,
uint64_t Address,
const void *Decoder) {
- int32_t BranchOffset = SignExtend32<21>(Offset) << 2;
+ int32_t BranchOffset = SignExtend32<21>(Offset) * 4;
Inst.addOperand(MCOperand::CreateImm(BranchOffset));
return MCDisassembler::Success;
@@ -1254,7 +1320,7 @@ static DecodeStatus DecodeBranchTarget26(MCInst &Inst,
unsigned Offset,
uint64_t Address,
const void *Decoder) {
- int32_t BranchOffset = SignExtend32<26>(Offset) << 2;
+ int32_t BranchOffset = SignExtend32<26>(Offset) * 4;
Inst.addOperand(MCOperand::CreateImm(BranchOffset));
return MCDisassembler::Success;
@@ -1264,7 +1330,7 @@ static DecodeStatus DecodeBranchTargetMM(MCInst &Inst,
unsigned Offset,
uint64_t Address,
const void *Decoder) {
- int32_t BranchOffset = SignExtend32<16>(Offset) << 1;
+ int32_t BranchOffset = SignExtend32<16>(Offset) * 2;
Inst.addOperand(MCOperand::CreateImm(BranchOffset));
return MCDisassembler::Success;
}
@@ -1317,12 +1383,12 @@ static DecodeStatus DecodeExtSize(MCInst &Inst,
static DecodeStatus DecodeSimm19Lsl2(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
- Inst.addOperand(MCOperand::CreateImm(SignExtend32<19>(Insn) << 2));
+ Inst.addOperand(MCOperand::CreateImm(SignExtend32<19>(Insn) * 4));
return MCDisassembler::Success;
}
static DecodeStatus DecodeSimm18Lsl3(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
- Inst.addOperand(MCOperand::CreateImm(SignExtend32<18>(Insn) << 3));
+ Inst.addOperand(MCOperand::CreateImm(SignExtend32<18>(Insn) * 8));
return MCDisassembler::Success;
}
diff --git a/lib/Target/Mips/Mips.td b/lib/Target/Mips/Mips.td
index dd3bc9b08fc8..3e1d047091a9 100644
--- a/lib/Target/Mips/Mips.td
+++ b/lib/Target/Mips/Mips.td
@@ -57,6 +57,8 @@ def MipsInstrInfo : InstrInfo;
// Mips Subtarget features //
//===----------------------------------------------------------------------===//
+def FeatureNoABICalls : SubtargetFeature<"noabicalls", "NoABICalls", "true",
+ "Disable SVR4-style position-independent code.">;
def FeatureGP64Bit : SubtargetFeature<"gp64", "IsGP64bit", "true",
"General Purpose Registers are 64-bit wide.">;
def FeatureFP64Bit : SubtargetFeature<"fp64", "IsFP64bit", "true",
@@ -67,13 +69,13 @@ def FeatureNaN2008 : SubtargetFeature<"nan2008", "IsNaN2008bit", "true",
"IEEE 754-2008 NaN encoding.">;
def FeatureSingleFloat : SubtargetFeature<"single-float", "IsSingleFloat",
"true", "Only supports single precision float">;
-def FeatureO32 : SubtargetFeature<"o32", "MipsABI", "O32",
+def FeatureO32 : SubtargetFeature<"o32", "ABI", "MipsABIInfo::O32()",
"Enable o32 ABI">;
-def FeatureN32 : SubtargetFeature<"n32", "MipsABI", "N32",
+def FeatureN32 : SubtargetFeature<"n32", "ABI", "MipsABIInfo::N32()",
"Enable n32 ABI">;
-def FeatureN64 : SubtargetFeature<"n64", "MipsABI", "N64",
+def FeatureN64 : SubtargetFeature<"n64", "ABI", "MipsABIInfo::N64()",
"Enable n64 ABI">;
-def FeatureEABI : SubtargetFeature<"eabi", "MipsABI", "EABI",
+def FeatureEABI : SubtargetFeature<"eabi", "ABI", "MipsABIInfo::EABI()",
"Enable eabi ABI">;
def FeatureNoOddSPReg : SubtargetFeature<"nooddspreg", "UseOddSPReg", "false",
"Disable odd numbered single-precision "
diff --git a/lib/Target/Mips/Mips16ISelLowering.cpp b/lib/Target/Mips/Mips16ISelLowering.cpp
index 587925df946b..9576fd48a215 100644
--- a/lib/Target/Mips/Mips16ISelLowering.cpp
+++ b/lib/Target/Mips/Mips16ISelLowering.cpp
@@ -241,10 +241,9 @@ Mips16TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
}
}
-bool Mips16TargetLowering::
-isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo,
- unsigned NextStackOffset,
- const MipsFunctionInfo& FI) const {
+bool Mips16TargetLowering::isEligibleForTailCallOptimization(
+ const CCState &CCInfo, unsigned NextStackOffset,
+ const MipsFunctionInfo &FI) const {
// No tail call optimization for mips16.
return false;
}
diff --git a/lib/Target/Mips/Mips16ISelLowering.h b/lib/Target/Mips/Mips16ISelLowering.h
index e7e4d7f651d4..1649fe68b770 100644
--- a/lib/Target/Mips/Mips16ISelLowering.h
+++ b/lib/Target/Mips/Mips16ISelLowering.h
@@ -30,9 +30,9 @@ namespace llvm {
MachineBasicBlock *MBB) const override;
private:
- bool isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo,
- unsigned NextStackOffset,
- const MipsFunctionInfo& FI) const override;
+ bool isEligibleForTailCallOptimization(
+ const CCState &CCInfo, unsigned NextStackOffset,
+ const MipsFunctionInfo &FI) const override;
void setMips16HardFloatLibCalls();
diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td
index f0b6814e37cc..f2072a6c06e3 100644
--- a/lib/Target/Mips/Mips64InstrInfo.td
+++ b/lib/Target/Mips/Mips64InstrInfo.td
@@ -419,6 +419,10 @@ defm : SetgePats<GPR64, SLT64, SLTu64>;
defm : SetgeImmPats<GPR64, SLTi64, SLTiu64>;
// truncate
+def : MipsPat<(trunc (assertsext GPR64:$src)),
+ (EXTRACT_SUBREG GPR64:$src, sub_32)>;
+def : MipsPat<(trunc (assertzext GPR64:$src)),
+ (EXTRACT_SUBREG GPR64:$src, sub_32)>;
def : MipsPat<(i32 (trunc GPR64:$src)),
(SLL (EXTRACT_SUBREG GPR64:$src, sub_32), 0)>;
diff --git a/lib/Target/Mips/MipsABIInfo.cpp b/lib/Target/Mips/MipsABIInfo.cpp
new file mode 100644
index 000000000000..f885369845e7
--- /dev/null
+++ b/lib/Target/Mips/MipsABIInfo.cpp
@@ -0,0 +1,45 @@
+//===---- MipsABIInfo.cpp - Information about MIPS ABI's ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MipsABIInfo.h"
+#include "MipsRegisterInfo.h"
+
+using namespace llvm;
+
+namespace {
+static const MCPhysReg O32IntRegs[4] = {Mips::A0, Mips::A1, Mips::A2, Mips::A3};
+
+static const MCPhysReg Mips64IntRegs[8] = {
+ Mips::A0_64, Mips::A1_64, Mips::A2_64, Mips::A3_64,
+ Mips::T0_64, Mips::T1_64, Mips::T2_64, Mips::T3_64};
+}
+
+const ArrayRef<MCPhysReg> MipsABIInfo::GetByValArgRegs() const {
+ if (IsO32())
+ return makeArrayRef(O32IntRegs);
+ if (IsN32() || IsN64())
+ return makeArrayRef(Mips64IntRegs);
+ llvm_unreachable("Unhandled ABI");
+}
+
+const ArrayRef<MCPhysReg> MipsABIInfo::GetVarArgRegs() const {
+ if (IsO32())
+ return makeArrayRef(O32IntRegs);
+ if (IsN32() || IsN64())
+ return makeArrayRef(Mips64IntRegs);
+ llvm_unreachable("Unhandled ABI");
+}
+
+unsigned MipsABIInfo::GetCalleeAllocdArgSizeInBytes(CallingConv::ID CC) const {
+ if (IsO32())
+ return CC != CallingConv::Fast ? 16 : 0;
+ if (IsN32() || IsN64() || IsEABI())
+ return 0;
+ llvm_unreachable("Unhandled ABI");
+}
diff --git a/lib/Target/Mips/MipsABIInfo.h b/lib/Target/Mips/MipsABIInfo.h
new file mode 100644
index 000000000000..bea585e41bd4
--- /dev/null
+++ b/lib/Target/Mips/MipsABIInfo.h
@@ -0,0 +1,61 @@
+//===---- MipsABIInfo.h - Information about MIPS ABI's --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSABIINFO_H
+#define MIPSABIINFO_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/IR/CallingConv.h"
+
+namespace llvm {
+
+class MipsABIInfo {
+public:
+ enum class ABI { Unknown, O32, N32, N64, EABI };
+
+protected:
+ ABI ThisABI;
+
+public:
+ MipsABIInfo(ABI ThisABI) : ThisABI(ThisABI) {}
+
+ static MipsABIInfo Unknown() { return MipsABIInfo(ABI::Unknown); }
+ static MipsABIInfo O32() { return MipsABIInfo(ABI::O32); }
+ static MipsABIInfo N32() { return MipsABIInfo(ABI::N32); }
+ static MipsABIInfo N64() { return MipsABIInfo(ABI::N64); }
+ static MipsABIInfo EABI() { return MipsABIInfo(ABI::EABI); }
+
+ bool IsKnown() const { return ThisABI != ABI::Unknown; }
+ bool IsO32() const { return ThisABI == ABI::O32; }
+ bool IsN32() const { return ThisABI == ABI::N32; }
+ bool IsN64() const { return ThisABI == ABI::N64; }
+ bool IsEABI() const { return ThisABI == ABI::EABI; }
+ ABI GetEnumValue() const { return ThisABI; }
+
+ /// The registers to use for byval arguments.
+ const ArrayRef<MCPhysReg> GetByValArgRegs() const;
+
+ /// The registers to use for the variable argument list.
+ const ArrayRef<MCPhysReg> GetVarArgRegs() const;
+
+ /// Obtain the size of the area allocated by the callee for arguments.
+ /// CallingConv::FastCall affects the value for O32.
+ unsigned GetCalleeAllocdArgSizeInBytes(CallingConv::ID CC) const;
+
+ /// Ordering of ABI's
+ /// MipsGenSubtargetInfo.inc will use this to resolve conflicts when given
+ /// multiple ABI options.
+ bool operator<(const MipsABIInfo Other) const {
+ return ThisABI < Other.GetEnumValue();
+ }
+};
+}
+
+#endif
diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp
index 7f21d68bdd1d..14463fce0dc3 100644
--- a/lib/Target/Mips/MipsAsmPrinter.cpp
+++ b/lib/Target/Mips/MipsAsmPrinter.cpp
@@ -317,11 +317,11 @@ void MipsAsmPrinter::emitFrameDirective() {
/// Emit Set directives.
const char *MipsAsmPrinter::getCurrentABIString() const {
- switch (Subtarget->getTargetABI()) {
- case MipsSubtarget::O32: return "abi32";
- case MipsSubtarget::N32: return "abiN32";
- case MipsSubtarget::N64: return "abi64";
- case MipsSubtarget::EABI: return "eabi32"; // TODO: handle eabi64
+ switch (Subtarget->getABI().GetEnumValue()) {
+ case MipsABIInfo::ABI::O32: return "abi32";
+ case MipsABIInfo::ABI::N32: return "abiN32";
+ case MipsABIInfo::ABI::N64: return "abi64";
+ case MipsABIInfo::ABI::EABI: return "eabi32"; // TODO: handle eabi64
default: llvm_unreachable("Unknown Mips ABI");
}
}
@@ -471,14 +471,12 @@ bool MipsAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
return false;
case 'z': {
// $0 if zero, regular printing otherwise
- if (MO.getType() != MachineOperand::MO_Immediate)
- return true;
- int64_t Val = MO.getImm();
- if (Val)
- O << Val;
- else
+ if (MO.getType() == MachineOperand::MO_Immediate && MO.getImm() == 0) {
O << "$0";
- return false;
+ return false;
+ }
+ // If not, call printOperand as normal.
+ break;
}
case 'D': // Second part of a double word register operand
case 'L': // Low order register of a double word register operand
@@ -669,9 +667,7 @@ printFCCOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
}
void MipsAsmPrinter::EmitStartOfAsmFile(Module &M) {
- // TODO: Need to add -mabicalls and -mno-abicalls flags.
- // Currently we assume that -mabicalls is the default.
- bool IsABICalls = true;
+ bool IsABICalls = Subtarget->isABICalls();
if (IsABICalls) {
getTargetStreamer().emitDirectiveAbiCalls();
Reloc::Model RM = TM.getRelocationModel();
diff --git a/lib/Target/Mips/MipsCCState.cpp b/lib/Target/Mips/MipsCCState.cpp
new file mode 100644
index 000000000000..e18cc8b1f7b3
--- /dev/null
+++ b/lib/Target/Mips/MipsCCState.cpp
@@ -0,0 +1,142 @@
+//===---- MipsCCState.cpp - CCState with Mips specific extensions ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MipsCCState.h"
+#include "MipsSubtarget.h"
+#include "llvm/IR/Module.h"
+
+using namespace llvm;
+
+/// This function returns true if CallSym is a long double emulation routine.
+static bool isF128SoftLibCall(const char *CallSym) {
+ const char *const LibCalls[] = {
+ "__addtf3", "__divtf3", "__eqtf2", "__extenddftf2",
+ "__extendsftf2", "__fixtfdi", "__fixtfsi", "__fixtfti",
+ "__fixunstfdi", "__fixunstfsi", "__fixunstfti", "__floatditf",
+ "__floatsitf", "__floattitf", "__floatunditf", "__floatunsitf",
+ "__floatuntitf", "__getf2", "__gttf2", "__letf2",
+ "__lttf2", "__multf3", "__netf2", "__powitf2",
+ "__subtf3", "__trunctfdf2", "__trunctfsf2", "__unordtf2",
+ "ceill", "copysignl", "cosl", "exp2l",
+ "expl", "floorl", "fmal", "fmodl",
+ "log10l", "log2l", "logl", "nearbyintl",
+ "powl", "rintl", "sinl", "sqrtl",
+ "truncl"};
+
+ const char *const *End = LibCalls + array_lengthof(LibCalls);
+
+ // Check that LibCalls is sorted alphabetically.
+ MipsTargetLowering::LTStr Comp;
+
+#ifndef NDEBUG
+ for (const char *const *I = LibCalls; I < End - 1; ++I)
+ assert(Comp(*I, *(I + 1)));
+#endif
+
+ return std::binary_search(LibCalls, End, CallSym, Comp);
+}
+
+/// This function returns true if Ty is fp128, {f128} or i128 which was
+/// originally a fp128.
+static bool originalTypeIsF128(const Type *Ty, const SDNode *CallNode) {
+ if (Ty->isFP128Ty())
+ return true;
+
+ if (Ty->isStructTy() && Ty->getStructNumElements() == 1 &&
+ Ty->getStructElementType(0)->isFP128Ty())
+ return true;
+
+ const ExternalSymbolSDNode *ES =
+ dyn_cast_or_null<const ExternalSymbolSDNode>(CallNode);
+
+ // If the Ty is i128 and the function being called is a long double emulation
+ // routine, then the original type is f128.
+ return (ES && Ty->isIntegerTy(128) && isF128SoftLibCall(ES->getSymbol()));
+}
+
+MipsCCState::SpecialCallingConvType
+MipsCCState::getSpecialCallingConvForCallee(const SDNode *Callee,
+ const MipsSubtarget &Subtarget) {
+ MipsCCState::SpecialCallingConvType SpecialCallingConv = NoSpecialCallingConv;
+ if (Subtarget.inMips16HardFloat()) {
+ if (const GlobalAddressSDNode *G =
+ dyn_cast<const GlobalAddressSDNode>(Callee)) {
+ llvm::StringRef Sym = G->getGlobal()->getName();
+ Function *F = G->getGlobal()->getParent()->getFunction(Sym);
+ if (F && F->hasFnAttribute("__Mips16RetHelper")) {
+ SpecialCallingConv = Mips16RetHelperConv;
+ }
+ }
+ }
+ return SpecialCallingConv;
+}
+
+void MipsCCState::PreAnalyzeCallResultForF128(
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ const TargetLowering::CallLoweringInfo &CLI) {
+ for (unsigned i = 0; i < Ins.size(); ++i) {
+ OriginalArgWasF128.push_back(
+ originalTypeIsF128(CLI.RetTy, CLI.Callee.getNode()));
+ OriginalArgWasFloat.push_back(CLI.RetTy->isFloatingPointTy());
+ }
+}
+
+/// Identify lowered values that originated from f128 arguments and record
+/// this for use by RetCC_MipsN.
+void MipsCCState::PreAnalyzeReturnForF128(
+ const SmallVectorImpl<ISD::OutputArg> &Outs) {
+ const MachineFunction &MF = getMachineFunction();
+ for (unsigned i = 0; i < Outs.size(); ++i) {
+ OriginalArgWasF128.push_back(
+ originalTypeIsF128(MF.getFunction()->getReturnType(), nullptr));
+ OriginalArgWasFloat.push_back(
+ MF.getFunction()->getReturnType()->isFloatingPointTy());
+ }
+}
+
+/// Identify lowered values that originated from f128 arguments and record
+/// this.
+void MipsCCState::PreAnalyzeCallOperands(
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ std::vector<TargetLowering::ArgListEntry> &FuncArgs,
+ const SDNode *CallNode) {
+ for (unsigned i = 0; i < Outs.size(); ++i) {
+ OriginalArgWasF128.push_back(
+ originalTypeIsF128(FuncArgs[Outs[i].OrigArgIndex].Ty, CallNode));
+ OriginalArgWasFloat.push_back(
+ FuncArgs[Outs[i].OrigArgIndex].Ty->isFloatingPointTy());
+ CallOperandIsFixed.push_back(Outs[i].IsFixed);
+ }
+}
+
+/// Identify lowered values that originated from f128 arguments and record
+/// this.
+void MipsCCState::PreAnalyzeFormalArgumentsForF128(
+ const SmallVectorImpl<ISD::InputArg> &Ins) {
+ const MachineFunction &MF = getMachineFunction();
+ for (unsigned i = 0; i < Ins.size(); ++i) {
+ Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin();
+
+ // SRet arguments cannot originate from f128 or {f128} returns so we just
+ // push false. We have to handle this specially since SRet arguments
+ // aren't mapped to an original argument.
+ if (Ins[i].Flags.isSRet()) {
+ OriginalArgWasF128.push_back(false);
+ OriginalArgWasFloat.push_back(false);
+ continue;
+ }
+
+ assert(Ins[i].OrigArgIndex < MF.getFunction()->arg_size());
+ std::advance(FuncArg, Ins[i].OrigArgIndex);
+
+ OriginalArgWasF128.push_back(
+ originalTypeIsF128(FuncArg->getType(), nullptr));
+ OriginalArgWasFloat.push_back(FuncArg->getType()->isFloatingPointTy());
+ }
+}
diff --git a/lib/Target/Mips/MipsCCState.h b/lib/Target/Mips/MipsCCState.h
new file mode 100644
index 000000000000..026b7b5e3275
--- /dev/null
+++ b/lib/Target/Mips/MipsCCState.h
@@ -0,0 +1,137 @@
+//===---- MipsCCState.h - CCState with Mips specific extensions -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSCCSTATE_H
+#define MIPSCCSTATE_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "MipsISelLowering.h"
+
+namespace llvm {
+class SDNode;
+class MipsSubtarget;
+
+class MipsCCState : public CCState {
+public:
+ enum SpecialCallingConvType { Mips16RetHelperConv, NoSpecialCallingConv };
+
+ /// Determine the SpecialCallingConvType for the given callee
+ static SpecialCallingConvType
+ getSpecialCallingConvForCallee(const SDNode *Callee,
+ const MipsSubtarget &Subtarget);
+
+private:
+ /// Identify lowered values that originated from f128 arguments and record
+ /// this for use by RetCC_MipsN.
+ void PreAnalyzeCallResultForF128(const SmallVectorImpl<ISD::InputArg> &Ins,
+ const TargetLowering::CallLoweringInfo &CLI);
+
+ /// Identify lowered values that originated from f128 arguments and record
+ /// this for use by RetCC_MipsN.
+ void PreAnalyzeReturnForF128(const SmallVectorImpl<ISD::OutputArg> &Outs);
+
+ /// Identify lowered values that originated from f128 arguments and record
+ /// this.
+ void
+ PreAnalyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs,
+ std::vector<TargetLowering::ArgListEntry> &FuncArgs,
+ const SDNode *CallNode);
+
+ /// Identify lowered values that originated from f128 arguments and record
+ /// this.
+ void
+ PreAnalyzeFormalArgumentsForF128(const SmallVectorImpl<ISD::InputArg> &Ins);
+
+ /// Records whether the value has been lowered from an f128.
+ SmallVector<bool, 4> OriginalArgWasF128;
+
+ /// Records whether the value has been lowered from float.
+ SmallVector<bool, 4> OriginalArgWasFloat;
+
+ /// Records whether the value was a fixed argument.
+ /// See ISD::OutputArg::IsFixed,
+ SmallVector<bool, 4> CallOperandIsFixed;
+
+ // Used to handle MIPS16-specific calling convention tweaks.
+ // FIXME: This should probably be a fully fledged calling convention.
+ SpecialCallingConvType SpecialCallingConv;
+
+public:
+ MipsCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF,
+ const TargetMachine &TM, SmallVectorImpl<CCValAssign> &locs,
+ LLVMContext &C,
+ SpecialCallingConvType SpecialCC = NoSpecialCallingConv)
+ : CCState(CC, isVarArg, MF, TM, locs, C), SpecialCallingConv(SpecialCC) {}
+
+ void
+ AnalyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs,
+ CCAssignFn Fn,
+ std::vector<TargetLowering::ArgListEntry> &FuncArgs,
+ const SDNode *CallNode) {
+ PreAnalyzeCallOperands(Outs, FuncArgs, CallNode);
+ CCState::AnalyzeCallOperands(Outs, Fn);
+ OriginalArgWasF128.clear();
+ OriginalArgWasFloat.clear();
+ CallOperandIsFixed.clear();
+ }
+
+ // The AnalyzeCallOperands in the base class is not usable since we must
+ // provide a means of accessing ArgListEntry::IsFixed. Delete them from this
+ // class. This doesn't stop them being used via the base class though.
+ void AnalyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs,
+ CCAssignFn Fn) LLVM_DELETED_FUNCTION;
+ void AnalyzeCallOperands(const SmallVectorImpl<MVT> &Outs,
+ SmallVectorImpl<ISD::ArgFlagsTy> &Flags,
+ CCAssignFn Fn) LLVM_DELETED_FUNCTION;
+
+ void AnalyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Ins,
+ CCAssignFn Fn) {
+ PreAnalyzeFormalArgumentsForF128(Ins);
+ CCState::AnalyzeFormalArguments(Ins, Fn);
+ OriginalArgWasFloat.clear();
+ OriginalArgWasF128.clear();
+ }
+
+ void AnalyzeCallResult(const SmallVectorImpl<ISD::InputArg> &Ins,
+ CCAssignFn Fn,
+ const TargetLowering::CallLoweringInfo &CLI) {
+ PreAnalyzeCallResultForF128(Ins, CLI);
+ CCState::AnalyzeCallResult(Ins, Fn);
+ OriginalArgWasFloat.clear();
+ OriginalArgWasF128.clear();
+ }
+
+ void AnalyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs,
+ CCAssignFn Fn) {
+ PreAnalyzeReturnForF128(Outs);
+ CCState::AnalyzeReturn(Outs, Fn);
+ OriginalArgWasFloat.clear();
+ OriginalArgWasF128.clear();
+ }
+
+ bool CheckReturn(const SmallVectorImpl<ISD::OutputArg> &ArgsFlags,
+ CCAssignFn Fn) {
+ PreAnalyzeReturnForF128(ArgsFlags);
+ bool Return = CCState::CheckReturn(ArgsFlags, Fn);
+ OriginalArgWasFloat.clear();
+ OriginalArgWasF128.clear();
+ return Return;
+ }
+
+ bool WasOriginalArgF128(unsigned ValNo) { return OriginalArgWasF128[ValNo]; }
+ bool WasOriginalArgFloat(unsigned ValNo) {
+ return OriginalArgWasFloat[ValNo];
+ }
+ bool IsCallOperandFixed(unsigned ValNo) { return CallOperandIsFixed[ValNo]; }
+ SpecialCallingConvType getSpecialCallingConv() { return SpecialCallingConv; }
+};
+}
+
+#endif
diff --git a/lib/Target/Mips/MipsCallingConv.td b/lib/Target/Mips/MipsCallingConv.td
index b1cd3c35782f..76297dcbbcf1 100644
--- a/lib/Target/Mips/MipsCallingConv.td
+++ b/lib/Target/Mips/MipsCallingConv.td
@@ -10,8 +10,42 @@
//===----------------------------------------------------------------------===//
/// CCIfSubtarget - Match if the current subtarget has a feature F.
-class CCIfSubtarget<string F, CCAction A>:
- CCIf<!strconcat("State.getTarget().getSubtarget<MipsSubtarget>().", F), A>;
+class CCIfSubtarget<string F, CCAction A, string Invert = "">
+ : CCIf<!strconcat(Invert,
+ "State.getMachineFunction().getTarget()."
+ "getSubtarget<const MipsSubtarget>().",
+ F),
+ A>;
+
+// The inverse of CCIfSubtarget
+class CCIfSubtargetNot<string F, CCAction A> : CCIfSubtarget<F, A, "!">;
+
+// For soft-float, f128 values are returned in A0_64 rather than V1_64.
+def RetCC_F128SoftFloat : CallingConv<[
+ CCAssignToReg<[V0_64, A0_64]>
+]>;
+
+// For hard-float, f128 values are returned as a pair of f64's rather than a
+// pair of i64's.
+def RetCC_F128HardFloat : CallingConv<[
+ CCBitConvertToType<f64>,
+
+ // Contrary to the ABI documentation, a struct containing a long double is
+ // returned in $f0, and $f1 instead of the usual $f0, and $f2. This is to
+ // match the de facto ABI as implemented by GCC.
+ CCIfInReg<CCAssignToReg<[D0_64, D1_64]>>,
+
+ CCAssignToReg<[D0_64, D2_64]>
+]>;
+
+// Handle F128 specially since we can't identify the original type during the
+// tablegen-erated code.
+def RetCC_F128 : CallingConv<[
+ CCIfSubtarget<"abiUsesSoftFloat()",
+ CCIfType<[i64], CCDelegateTo<RetCC_F128SoftFloat>>>,
+ CCIfSubtargetNot<"abiUsesSoftFloat()",
+ CCIfType<[i64], CCDelegateTo<RetCC_F128HardFloat>>>
+]>;
//===----------------------------------------------------------------------===//
// Mips O32 Calling Convention
@@ -29,23 +63,43 @@ def RetCC_MipsO32 : CallingConv<[
// f64 arguments are returned in D0_64 and D2_64 in FP64bit mode or
// in D0 and D1 in FP32bit mode.
CCIfType<[f64], CCIfSubtarget<"isFP64bit()", CCAssignToReg<[D0_64, D2_64]>>>,
- CCIfType<[f64], CCIfSubtarget<"isNotFP64bit()", CCAssignToReg<[D0, D1]>>>
+ CCIfType<[f64], CCIfSubtargetNot<"isFP64bit()", CCAssignToReg<[D0, D1]>>>
+]>;
+
+def CC_MipsO32_FP32 : CustomCallingConv;
+def CC_MipsO32_FP64 : CustomCallingConv;
+
+def CC_MipsO32_FP : CallingConv<[
+ CCIfSubtargetNot<"isFP64bit()", CCDelegateTo<CC_MipsO32_FP32>>,
+ CCIfSubtarget<"isFP64bit()", CCDelegateTo<CC_MipsO32_FP64>>
]>;
//===----------------------------------------------------------------------===//
// Mips N32/64 Calling Convention
//===----------------------------------------------------------------------===//
+def CC_MipsN_SoftFloat : CallingConv<[
+ CCAssignToRegWithShadow<[A0, A1, A2, A3,
+ T0, T1, T2, T3],
+ [D12_64, D13_64, D14_64, D15_64,
+ D16_64, D17_64, D18_64, D19_64]>,
+ CCAssignToStack<4, 8>
+]>;
+
def CC_MipsN : CallingConv<[
- // Promote i8/i16 arguments to i32.
- CCIfType<[i8, i16], CCPromoteToType<i32>>,
+ CCIfType<[i8, i16, i32],
+ CCIfSubtargetNot<"isLittle()",
+ CCIfInReg<CCPromoteToUpperBitsInType<i64>>>>,
- // Integer arguments are passed in integer registers.
- CCIfType<[i32], CCAssignToRegWithShadow<[A0, A1, A2, A3,
- T0, T1, T2, T3],
- [F12, F13, F14, F15,
- F16, F17, F18, F19]>>,
+ // All integers (except soft-float integers) are promoted to 64-bit.
+ CCIfType<[i8, i16, i32],
+ CCIf<"!static_cast<MipsCCState *>(&State)->WasOriginalArgFloat(ValNo)",
+ CCPromoteToType<i64>>>,
+
+ // The only i32's we have left are soft-float arguments.
+ CCIfSubtarget<"abiUsesSoftFloat()", CCIfType<[i32], CCDelegateTo<CC_MipsN_SoftFloat>>>,
+ // Integer arguments are passed in integer registers.
CCIfType<[i64], CCAssignToRegWithShadow<[A0_64, A1_64, A2_64, A3_64,
T0_64, T1_64, T2_64, T3_64],
[D12_64, D13_64, D14_64, D15_64,
@@ -64,29 +118,49 @@ def CC_MipsN : CallingConv<[
T0_64, T1_64, T2_64, T3_64]>>,
// All stack parameter slots become 64-bit doublewords and are 8-byte aligned.
- CCIfType<[i32, f32], CCAssignToStack<4, 8>>,
+ CCIfType<[f32], CCAssignToStack<4, 8>>,
CCIfType<[i64, f64], CCAssignToStack<8, 8>>
]>;
// N32/64 variable arguments.
// All arguments are passed in integer registers.
def CC_MipsN_VarArg : CallingConv<[
- // Promote i8/i16 arguments to i32.
- CCIfType<[i8, i16], CCPromoteToType<i32>>,
+ // All integers are promoted to 64-bit.
+ CCIfType<[i8, i16, i32], CCPromoteToType<i64>>,
- CCIfType<[i32, f32], CCAssignToReg<[A0, A1, A2, A3, T0, T1, T2, T3]>>,
+ CCIfType<[f32], CCAssignToReg<[A0, A1, A2, A3, T0, T1, T2, T3]>>,
CCIfType<[i64, f64], CCAssignToReg<[A0_64, A1_64, A2_64, A3_64,
T0_64, T1_64, T2_64, T3_64]>>,
// All stack parameter slots become 64-bit doublewords and are 8-byte aligned.
- CCIfType<[i32, f32], CCAssignToStack<4, 8>>,
+ CCIfType<[f32], CCAssignToStack<4, 8>>,
CCIfType<[i64, f64], CCAssignToStack<8, 8>>
]>;
def RetCC_MipsN : CallingConv<[
- // i32 are returned in registers V0, V1
- CCIfType<[i32], CCAssignToReg<[V0, V1]>>,
+ // f128 needs to be handled similarly to f32 and f64. However, f128 is not
+ // legal and is lowered to i128 which is further lowered to a pair of i64's.
+ // This presents us with a problem for the calling convention since hard-float
+ // still needs to pass them in FPU registers, and soft-float needs to use $v0,
+ // and $a0 instead of the usual $v0, and $v1. We therefore resort to a
+ // pre-analyze (see PreAnalyzeReturnForF128()) step to pass information on
+ // whether the result was originally an f128 into the tablegen-erated code.
+ //
+ // f128 should only occur for the N64 ABI where long double is 128-bit. On
+ // N32, long double is equivalent to double.
+ CCIfType<[i64],
+ CCIf<"static_cast<MipsCCState *>(&State)->WasOriginalArgF128(ValNo)",
+ CCDelegateTo<RetCC_F128>>>,
+
+ // Aggregate returns are positioned at the lowest address in the slot for
+ // both little and big-endian targets. When passing in registers, this
+ // requires that big-endian targets shift the value into the upper bits.
+ CCIfSubtarget<"isLittle()",
+ CCIfType<[i8, i16, i32, i64], CCIfInReg<CCPromoteToType<i64>>>>,
+ CCIfSubtargetNot<"isLittle()",
+ CCIfType<[i8, i16, i32, i64],
+ CCIfInReg<CCPromoteToUpperBitsInType<i64>>>>,
// i64 are returned in registers V0_64, V1_64
CCIfType<[i64], CCAssignToReg<[V0_64, V1_64]>>,
@@ -98,12 +172,6 @@ def RetCC_MipsN : CallingConv<[
CCIfType<[f64], CCAssignToReg<[D0_64, D2_64]>>
]>;
-// In soft-mode, register A0_64, instead of V1_64, is used to return a long
-// double value.
-def RetCC_F128Soft : CallingConv<[
- CCIfType<[i64], CCAssignToReg<[V0_64, A0_64]>>
-]>;
-
//===----------------------------------------------------------------------===//
// Mips EABI Calling Convention
//===----------------------------------------------------------------------===//
@@ -119,11 +187,11 @@ def CC_MipsEABI : CallingConv<[
CCIfType<[f32], CCIfSubtarget<"isSingleFloat()",
CCAssignToReg<[F12, F13, F14, F15, F16, F17, F18, F19]>>>,
- CCIfType<[f32], CCIfSubtarget<"isNotSingleFloat()",
+ CCIfType<[f32], CCIfSubtargetNot<"isSingleFloat()",
CCAssignToReg<[F12, F14, F16, F18]>>>,
// The first 4 double fp arguments are passed in single fp registers.
- CCIfType<[f64], CCIfSubtarget<"isNotSingleFloat()",
+ CCIfType<[f64], CCIfSubtargetNot<"isSingleFloat()",
CCAssignToReg<[D6, D7, D8, D9]>>>,
// Integer values get stored in stack slots that are 4 bytes in
@@ -132,7 +200,7 @@ def CC_MipsEABI : CallingConv<[
// Integer values get stored in stack slots that are 8 bytes in
// size and 8-byte aligned.
- CCIfType<[f64], CCIfSubtarget<"isNotSingleFloat()", CCAssignToStack<8, 8>>>
+ CCIfType<[f64], CCIfSubtargetNot<"isSingleFloat()", CCAssignToStack<8, 8>>>
]>;
def RetCC_MipsEABI : CallingConv<[
@@ -143,7 +211,7 @@ def RetCC_MipsEABI : CallingConv<[
CCIfType<[f32], CCAssignToReg<[F0, F1]>>,
// f64 are returned in register D0
- CCIfType<[f64], CCIfSubtarget<"isNotSingleFloat()", CCAssignToReg<[D0]>>>
+ CCIfType<[f64], CCIfSubtargetNot<"isSingleFloat()", CCAssignToReg<[D0]>>>
]>;
//===----------------------------------------------------------------------===//
@@ -151,16 +219,20 @@ def RetCC_MipsEABI : CallingConv<[
//===----------------------------------------------------------------------===//
def CC_MipsO32_FastCC : CallingConv<[
// f64 arguments are passed in double-precision floating pointer registers.
- CCIfType<[f64], CCIfSubtarget<"isNotFP64bit()",
- CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7,
- D8, D9]>>>,
- CCIfType<[f64], CCIfSubtarget<"isFP64bit()",
+ CCIfType<[f64], CCIfSubtargetNot<"isFP64bit()",
+ CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6,
+ D7, D8, D9]>>>,
+ CCIfType<[f64], CCIfSubtarget<"isFP64bit()", CCIfSubtarget<"useOddSPReg()",
CCAssignToReg<[D0_64, D1_64, D2_64, D3_64,
D4_64, D5_64, D6_64, D7_64,
D8_64, D9_64, D10_64, D11_64,
D12_64, D13_64, D14_64, D15_64,
D16_64, D17_64, D18_64,
- D19_64]>>>,
+ D19_64]>>>>,
+ CCIfType<[f64], CCIfSubtarget<"isFP64bit()", CCIfSubtarget<"noOddSPReg()",
+ CCAssignToReg<[D0_64, D2_64, D4_64, D6_64,
+ D8_64, D10_64, D12_64, D14_64,
+ D16_64, D18_64]>>>>,
// Stack parameter slots for f64 are 64-bit doublewords and 8-byte aligned.
CCIfType<[f64], CCAssignToStack<8, 8>>
@@ -192,7 +264,7 @@ def CC_Mips_FastCC : CallingConv<[
// Integer arguments are passed in integer registers. All scratch registers,
// except for AT, V0 and T9, are available to be used as argument registers.
- CCIfType<[i32], CCIfSubtarget<"isNotTargetNaCl()",
+ CCIfType<[i32], CCIfSubtargetNot<"isTargetNaCl()",
CCAssignToReg<[A0, A1, A2, A3, T0, T1, T2, T3, T4, T5, T6, T7, T8, V1]>>>,
// In NaCl, T6, T7 and T8 are reserved and not available as argument
@@ -219,13 +291,6 @@ def CC_Mips_FastCC : CallingConv<[
CCDelegateTo<CC_MipsN_FastCC>
]>;
-//==
-
-def CC_Mips16RetHelper : CallingConv<[
- // Integer arguments are passed in integer registers.
- CCIfType<[i32], CCAssignToReg<[V0, V1, A0, A1]>>
-]>;
-
//===----------------------------------------------------------------------===//
// Mips Calling Convention Dispatch
//===----------------------------------------------------------------------===//
@@ -237,6 +302,66 @@ def RetCC_Mips : CallingConv<[
CCDelegateTo<RetCC_MipsO32>
]>;
+def CC_Mips_ByVal : CallingConv<[
+ CCIfSubtarget<"isABI_O32()", CCIfByVal<CCPassByVal<4, 4>>>,
+ CCIfByVal<CCPassByVal<8, 8>>
+]>;
+
+def CC_Mips16RetHelper : CallingConv<[
+ CCIfByVal<CCDelegateTo<CC_Mips_ByVal>>,
+
+ // Integer arguments are passed in integer registers.
+ CCIfType<[i32], CCAssignToReg<[V0, V1, A0, A1]>>
+]>;
+
+def CC_Mips_FixedArg : CallingConv<[
+ // Mips16 needs special handling on some functions.
+ CCIf<"State.getCallingConv() != CallingConv::Fast",
+ CCIf<"static_cast<MipsCCState *>(&State)->getSpecialCallingConv() == "
+ "MipsCCState::Mips16RetHelperConv",
+ CCDelegateTo<CC_Mips16RetHelper>>>,
+
+ CCIfByVal<CCDelegateTo<CC_Mips_ByVal>>,
+
+ // f128 needs to be handled similarly to f32 and f64 on hard-float. However,
+ // f128 is not legal and is lowered to i128 which is further lowered to a pair
+ // of i64's.
+ // This presents us with a problem for the calling convention since hard-float
+ // still needs to pass them in FPU registers. We therefore resort to a
+ // pre-analyze (see PreAnalyzeFormalArgsForF128()) step to pass information on
+ // whether the argument was originally an f128 into the tablegen-erated code.
+ //
+ // f128 should only occur for the N64 ABI where long double is 128-bit. On
+ // N32, long double is equivalent to double.
+ CCIfType<[i64],
+ CCIfSubtargetNot<"abiUsesSoftFloat()",
+ CCIf<"static_cast<MipsCCState *>(&State)->WasOriginalArgF128(ValNo)",
+ CCBitConvertToType<f64>>>>,
+
+ CCIfCC<"CallingConv::Fast", CCDelegateTo<CC_Mips_FastCC>>,
+
+ // FIXME: There wasn't an EABI case in the original code and it seems unlikely
+ // that it's the same as CC_MipsN
+ CCIfSubtarget<"isABI_O32()", CCDelegateTo<CC_MipsO32_FP>>,
+ CCDelegateTo<CC_MipsN>
+]>;
+
+def CC_Mips_VarArg : CallingConv<[
+ CCIfByVal<CCDelegateTo<CC_Mips_ByVal>>,
+
+ // FIXME: There wasn't an EABI case in the original code and it seems unlikely
+ // that it's the same as CC_MipsN_VarArg
+ CCIfSubtarget<"isABI_O32()", CCDelegateTo<CC_MipsO32_FP>>,
+ CCDelegateTo<CC_MipsN_VarArg>
+]>;
+
+def CC_Mips : CallingConv<[
+ CCIfVarArg<
+ CCIf<"!static_cast<MipsCCState *>(&State)->IsCallOperandFixed(ValNo)",
+ CCDelegateTo<CC_Mips_VarArg>>>,
+ CCDelegateTo<CC_Mips_FixedArg>
+]>;
+
//===----------------------------------------------------------------------===//
// Callee-saved register lists.
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Mips/MipsConstantIslandPass.cpp b/lib/Target/Mips/MipsConstantIslandPass.cpp
index 80bf573a9625..76313b1d1a9d 100644
--- a/lib/Target/Mips/MipsConstantIslandPass.cpp
+++ b/lib/Target/Mips/MipsConstantIslandPass.cpp
@@ -343,7 +343,6 @@ namespace {
const TargetMachine &TM;
bool IsPIC;
- unsigned ABI;
const MipsSubtarget *STI;
const Mips16InstrInfo *TII;
MipsFunctionInfo *MFI;
@@ -366,8 +365,7 @@ namespace {
static char ID;
MipsConstantIslands(TargetMachine &tm)
: MachineFunctionPass(ID), TM(tm),
- IsPIC(TM.getRelocationModel() == Reloc::PIC_),
- ABI(TM.getSubtarget<MipsSubtarget>().getTargetABI()), STI(nullptr),
+ IsPIC(TM.getRelocationModel() == Reloc::PIC_), STI(nullptr),
MF(nullptr), MCP(nullptr), PrescannedForConstants(false) {}
const char *getPassName() const override {
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index 40dc8e41b055..8e40668b2fa7 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -14,6 +14,7 @@
#include "MipsISelLowering.h"
#include "InstPrinter/MipsInstPrinter.h"
#include "MCTargetDesc/MipsBaseInfo.h"
+#include "MipsCCState.h"
#include "MipsMachineFunction.h"
#include "MipsSubtarget.h"
#include "MipsTargetMachine.h"
@@ -56,15 +57,6 @@ EnableMipsFastISel("mips-fast-isel", cl::Hidden,
cl::desc("Allow mips-fast-isel to be used"),
cl::init(false));
-static const MCPhysReg O32IntRegs[4] = {
- Mips::A0, Mips::A1, Mips::A2, Mips::A3
-};
-
-static const MCPhysReg Mips64IntRegs[8] = {
- Mips::A0_64, Mips::A1_64, Mips::A2_64, Mips::A3_64,
- Mips::T0_64, Mips::T1_64, Mips::T2_64, Mips::T3_64
-};
-
static const MCPhysReg Mips64DPRegs[8] = {
Mips::D12_64, Mips::D13_64, Mips::D14_64, Mips::D15_64,
Mips::D16_64, Mips::D17_64, Mips::D18_64, Mips::D19_64
@@ -251,7 +243,6 @@ MipsTargetLowering::MipsTargetLowering(MipsTargetMachine &TM,
setOperationAction(ISD::SETCC, MVT::f32, Custom);
setOperationAction(ISD::SETCC, MVT::f64, Custom);
setOperationAction(ISD::BRCOND, MVT::Other, Custom);
- setOperationAction(ISD::VASTART, MVT::Other, Custom);
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
@@ -343,7 +334,8 @@ MipsTargetLowering::MipsTargetLowering(MipsTargetMachine &TM,
setOperationAction(ISD::EH_RETURN, MVT::Other, Custom);
- setOperationAction(ISD::VAARG, MVT::Other, Expand);
+ setOperationAction(ISD::VASTART, MVT::Other, Custom);
+ setOperationAction(ISD::VAARG, MVT::Other, Custom);
setOperationAction(ISD::VACOPY, MVT::Other, Expand);
setOperationAction(ISD::VAEND, MVT::Other, Expand);
@@ -392,6 +384,11 @@ MipsTargetLowering::MipsTargetLowering(MipsTargetMachine &TM,
setMinFunctionAlignment(Subtarget.isGP64bit() ? 3 : 2);
+ // The arguments on the stack are defined in terms of 4-byte slots on O32
+ // and 8-byte slots on N32/N64.
+ setMinStackArgumentAlignment(
+ (Subtarget.isABI_N32() || Subtarget.isABI_N64()) ? 8 : 4);
+
setStackPointerRegisterToSaveRestore(Subtarget.isABI_N64() ? Mips::SP_64
: Mips::SP);
@@ -792,6 +789,7 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const
case ISD::SELECT_CC: return lowerSELECT_CC(Op, DAG);
case ISD::SETCC: return lowerSETCC(Op, DAG);
case ISD::VASTART: return lowerVASTART(Op, DAG);
+ case ISD::VAARG: return lowerVAARG(Op, DAG);
case ISD::FCOPYSIGN: return lowerFCOPYSIGN(Op, DAG);
case ISD::FRAMEADDR: return lowerFRAMEADDR(Op, DAG);
case ISD::RETURNADDR: return lowerRETURNADDR(Op, DAG);
@@ -1755,6 +1753,65 @@ SDValue MipsTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
MachinePointerInfo(SV), false, false, 0);
}
+SDValue MipsTargetLowering::lowerVAARG(SDValue Op, SelectionDAG &DAG) const {
+ SDNode *Node = Op.getNode();
+ EVT VT = Node->getValueType(0);
+ SDValue Chain = Node->getOperand(0);
+ SDValue VAListPtr = Node->getOperand(1);
+ unsigned Align = Node->getConstantOperandVal(3);
+ const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
+ SDLoc DL(Node);
+ unsigned ArgSlotSizeInBytes =
+ (Subtarget.isABI_N32() || Subtarget.isABI_N64()) ? 8 : 4;
+
+ SDValue VAListLoad = DAG.getLoad(getPointerTy(), DL, Chain, VAListPtr,
+ MachinePointerInfo(SV), false, false, false,
+ 0);
+ SDValue VAList = VAListLoad;
+
+ // Re-align the pointer if necessary.
+ // It should only ever be necessary for 64-bit types on O32 since the minimum
+ // argument alignment is the same as the maximum type alignment for N32/N64.
+ //
+ // FIXME: We currently align too often. The code generator doesn't notice
+ // when the pointer is still aligned from the last va_arg (or pair of
+ // va_args for the i64 on O32 case).
+ if (Align > getMinStackArgumentAlignment()) {
+ assert(((Align & (Align-1)) == 0) && "Expected Align to be a power of 2");
+
+ VAList = DAG.getNode(ISD::ADD, DL, VAList.getValueType(), VAList,
+ DAG.getConstant(Align - 1,
+ VAList.getValueType()));
+
+ VAList = DAG.getNode(ISD::AND, DL, VAList.getValueType(), VAList,
+ DAG.getConstant(-(int64_t)Align,
+ VAList.getValueType()));
+ }
+
+ // Increment the pointer, VAList, to the next vaarg.
+ unsigned ArgSizeInBytes = getDataLayout()->getTypeAllocSize(VT.getTypeForEVT(*DAG.getContext()));
+ SDValue Tmp3 = DAG.getNode(ISD::ADD, DL, VAList.getValueType(), VAList,
+ DAG.getConstant(RoundUpToAlignment(ArgSizeInBytes, ArgSlotSizeInBytes),
+ VAList.getValueType()));
+ // Store the incremented VAList to the legalized pointer
+ Chain = DAG.getStore(VAListLoad.getValue(1), DL, Tmp3, VAListPtr,
+ MachinePointerInfo(SV), false, false, 0);
+
+ // In big-endian mode we must adjust the pointer when the load size is smaller
+ // than the argument slot size. We must also reduce the known alignment to
+ // match. For example in the N64 ABI, we must add 4 bytes to the offset to get
+ // the correct half of the slot, and reduce the alignment from 8 (slot
+ // alignment) down to 4 (type alignment).
+ if (!Subtarget.isLittle() && ArgSizeInBytes < ArgSlotSizeInBytes) {
+ unsigned Adjustment = ArgSlotSizeInBytes - ArgSizeInBytes;
+ VAList = DAG.getNode(ISD::ADD, DL, VAListPtr.getValueType(), VAList,
+ DAG.getIntPtrConstant(Adjustment));
+ }
+ // Load the actual argument out of the pointer VAList
+ return DAG.getLoad(VT, DL, Chain, VAList, MachinePointerInfo(), false, false,
+ false, 0);
+}
+
static SDValue lowerFCOPYSIGN32(SDValue Op, SelectionDAG &DAG,
bool HasExtractInsert) {
EVT TyX = Op.getOperand(0).getValueType();
@@ -2211,6 +2268,9 @@ SDValue MipsTargetLowering::lowerFP_TO_SINT(SDValue Op,
static bool CC_MipsO32(unsigned ValNo, MVT ValVT, MVT LocVT,
CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
CCState &State, const MCPhysReg *F64Regs) {
+ const MipsSubtarget &Subtarget =
+ State.getMachineFunction().getTarget()
+ .getSubtarget<const MipsSubtarget>();
static const unsigned IntRegsSize = 4, FloatRegsSize = 2;
@@ -2222,6 +2282,19 @@ static bool CC_MipsO32(unsigned ValNo, MVT ValVT, MVT LocVT,
return true;
// Promote i8 and i16
+ if (ArgFlags.isInReg() && !Subtarget.isLittle()) {
+ if (LocVT == MVT::i8 || LocVT == MVT::i16 || LocVT == MVT::i32) {
+ LocVT = MVT::i32;
+ if (ArgFlags.isSExt())
+ LocInfo = CCValAssign::SExtUpper;
+ else if (ArgFlags.isZExt())
+ LocInfo = CCValAssign::ZExtUpper;
+ else
+ LocInfo = CCValAssign::AExtUpper;
+ }
+ }
+
+ // Promote i8 and i16
if (LocVT == MVT::i8 || LocVT == MVT::i16) {
LocVT = MVT::i32;
if (ArgFlags.isSExt())
@@ -2407,25 +2480,25 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(),
- getTargetMachine(), ArgLocs, *DAG.getContext());
- MipsCC::SpecialCallingConvType SpecialCallingConv =
- getSpecialCallingConv(Callee);
- MipsCC MipsCCInfo(CallConv, Subtarget.isABI_O32(), Subtarget.isFP64bit(),
- CCInfo, SpecialCallingConv);
+ MipsCCState CCInfo(
+ CallConv, IsVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext(),
+ MipsCCState::getSpecialCallingConvForCallee(Callee.getNode(), Subtarget));
- MipsCCInfo.analyzeCallOperands(Outs, IsVarArg,
- Subtarget.abiUsesSoftFloat(),
- Callee.getNode(), CLI.getArgs());
+ // Allocate the reserved argument area. It seems strange to do this from the
+ // caller side but removing it breaks the frame size calculation.
+ const MipsABIInfo &ABI = Subtarget.getABI();
+ CCInfo.AllocateStack(ABI.GetCalleeAllocdArgSizeInBytes(CallConv), 1);
+
+ CCInfo.AnalyzeCallOperands(Outs, CC_Mips, CLI.getArgs(), Callee.getNode());
// Get a count of how many bytes are to be pushed on the stack.
unsigned NextStackOffset = CCInfo.getNextStackOffset();
// Check if it's really possible to do a tail call.
if (IsTailCall)
- IsTailCall =
- isEligibleForTailCallOptimization(MipsCCInfo, NextStackOffset,
- *MF.getInfo<MipsFunctionInfo>());
+ IsTailCall = isEligibleForTailCallOptimization(
+ CCInfo, NextStackOffset, *MF.getInfo<MipsFunctionInfo>());
if (!IsTailCall && CLI.CS && CLI.CS->isMustTailCall())
report_fatal_error("failed to perform tail call elimination on a call "
@@ -2451,7 +2524,8 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// With EABI is it possible to have 16 args on registers.
std::deque< std::pair<unsigned, SDValue> > RegsToPass;
SmallVector<SDValue, 8> MemOpChains;
- MipsCC::byval_iterator ByValArg = MipsCCInfo.byval_begin();
+
+ CCInfo.rewindByValRegsInfo();
// Walk the register/memloc assignments, inserting copies/loads.
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
@@ -2459,23 +2533,30 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
CCValAssign &VA = ArgLocs[i];
MVT ValVT = VA.getValVT(), LocVT = VA.getLocVT();
ISD::ArgFlagsTy Flags = Outs[i].Flags;
+ bool UseUpperBits = false;
// ByVal Arg.
if (Flags.isByVal()) {
+ unsigned FirstByValReg, LastByValReg;
+ unsigned ByValIdx = CCInfo.getInRegsParamsProcessed();
+ CCInfo.getInRegsParamInfo(ByValIdx, FirstByValReg, LastByValReg);
+
assert(Flags.getByValSize() &&
"ByVal args of size 0 should have been ignored by front-end.");
- assert(ByValArg != MipsCCInfo.byval_end());
+ assert(ByValIdx < CCInfo.getInRegsParamsCount());
assert(!IsTailCall &&
"Do not tail-call optimize if there is a byval argument.");
passByValArg(Chain, DL, RegsToPass, MemOpChains, StackPtr, MFI, DAG, Arg,
- MipsCCInfo, *ByValArg, Flags, Subtarget.isLittle());
- ++ByValArg;
+ FirstByValReg, LastByValReg, Flags, Subtarget.isLittle(),
+ VA);
+ CCInfo.nextInRegsParam();
continue;
}
// Promote the value if needed.
switch (VA.getLocInfo()) {
- default: llvm_unreachable("Unknown loc info!");
+ default:
+ llvm_unreachable("Unknown loc info!");
case CCValAssign::Full:
if (VA.isRegLoc()) {
if ((ValVT == MVT::f32 && LocVT == MVT::i32) ||
@@ -2497,17 +2578,37 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
}
}
break;
+ case CCValAssign::BCvt:
+ Arg = DAG.getNode(ISD::BITCAST, DL, LocVT, Arg);
+ break;
+ case CCValAssign::SExtUpper:
+ UseUpperBits = true;
+ // Fallthrough
case CCValAssign::SExt:
Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, LocVT, Arg);
break;
+ case CCValAssign::ZExtUpper:
+ UseUpperBits = true;
+ // Fallthrough
case CCValAssign::ZExt:
Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, LocVT, Arg);
break;
+ case CCValAssign::AExtUpper:
+ UseUpperBits = true;
+ // Fallthrough
case CCValAssign::AExt:
Arg = DAG.getNode(ISD::ANY_EXTEND, DL, LocVT, Arg);
break;
}
+ if (UseUpperBits) {
+ unsigned ValSizeInBits = Outs[i].ArgVT.getSizeInBits();
+ unsigned LocSizeInBits = VA.getLocVT().getSizeInBits();
+ Arg = DAG.getNode(
+ ISD::SHL, DL, VA.getLocVT(), Arg,
+ DAG.getConstant(LocSizeInBits - ValSizeInBits, VA.getLocVT()));
+ }
+
// Arguments that can be passed on register must be kept at
// RegsToPass vector
if (VA.isRegLoc()) {
@@ -2595,39 +2696,68 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Handle result values, copying them out of physregs into vregs that we
// return.
- return LowerCallResult(Chain, InFlag, CallConv, IsVarArg,
- Ins, DL, DAG, InVals, CLI.Callee.getNode(), CLI.RetTy);
+ return LowerCallResult(Chain, InFlag, CallConv, IsVarArg, Ins, DL, DAG,
+ InVals, CLI);
}
/// LowerCallResult - Lower the result values of a call into the
/// appropriate copies out of appropriate physical registers.
-SDValue
-MipsTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
- CallingConv::ID CallConv, bool IsVarArg,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- SDLoc DL, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals,
- const SDNode *CallNode,
- const Type *RetTy) const {
+SDValue MipsTargetLowering::LowerCallResult(
+ SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool IsVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins, SDLoc DL, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals,
+ TargetLowering::CallLoweringInfo &CLI) const {
// Assign locations to each value returned by this call.
SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(),
- getTargetMachine(), RVLocs, *DAG.getContext());
- MipsCC MipsCCInfo(CallConv, Subtarget.isABI_O32(), Subtarget.isFP64bit(),
- CCInfo);
-
- MipsCCInfo.analyzeCallResult(Ins, Subtarget.abiUsesSoftFloat(),
- CallNode, RetTy);
+ MipsCCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs, *DAG.getContext());
+ CCInfo.AnalyzeCallResult(Ins, RetCC_Mips, CLI);
// Copy all of the result registers out of their specified physreg.
for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ CCValAssign &VA = RVLocs[i];
+ assert(VA.isRegLoc() && "Can only return in registers!");
+
SDValue Val = DAG.getCopyFromReg(Chain, DL, RVLocs[i].getLocReg(),
RVLocs[i].getLocVT(), InFlag);
Chain = Val.getValue(1);
InFlag = Val.getValue(2);
- if (RVLocs[i].getValVT() != RVLocs[i].getLocVT())
- Val = DAG.getNode(ISD::BITCAST, DL, RVLocs[i].getValVT(), Val);
+ if (VA.isUpperBitsInLoc()) {
+ unsigned ValSizeInBits = Ins[i].ArgVT.getSizeInBits();
+ unsigned LocSizeInBits = VA.getLocVT().getSizeInBits();
+ unsigned Shift =
+ VA.getLocInfo() == CCValAssign::ZExtUpper ? ISD::SRL : ISD::SRA;
+ Val = DAG.getNode(
+ Shift, DL, VA.getLocVT(), Val,
+ DAG.getConstant(LocSizeInBits - ValSizeInBits, VA.getLocVT()));
+ }
+
+ switch (VA.getLocInfo()) {
+ default:
+ llvm_unreachable("Unknown loc info!");
+ case CCValAssign::Full:
+ break;
+ case CCValAssign::BCvt:
+ Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
+ break;
+ case CCValAssign::AExt:
+ case CCValAssign::AExtUpper:
+ Val = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Val);
+ break;
+ case CCValAssign::ZExt:
+ case CCValAssign::ZExtUpper:
+ Val = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), Val,
+ DAG.getValueType(VA.getValVT()));
+ Val = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Val);
+ break;
+ case CCValAssign::SExt:
+ case CCValAssign::SExtUpper:
+ Val = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), Val,
+ DAG.getValueType(VA.getValVT()));
+ Val = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Val);
+ break;
+ }
InVals.push_back(Val);
}
@@ -2635,6 +2765,60 @@ MipsTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
return Chain;
}
+static SDValue UnpackFromArgumentSlot(SDValue Val, const CCValAssign &VA,
+ EVT ArgVT, SDLoc DL, SelectionDAG &DAG) {
+ MVT LocVT = VA.getLocVT();
+ EVT ValVT = VA.getValVT();
+
+ // Shift into the upper bits if necessary.
+ switch (VA.getLocInfo()) {
+ default:
+ break;
+ case CCValAssign::AExtUpper:
+ case CCValAssign::SExtUpper:
+ case CCValAssign::ZExtUpper: {
+ unsigned ValSizeInBits = ArgVT.getSizeInBits();
+ unsigned LocSizeInBits = VA.getLocVT().getSizeInBits();
+ unsigned Opcode =
+ VA.getLocInfo() == CCValAssign::ZExtUpper ? ISD::SRL : ISD::SRA;
+ Val = DAG.getNode(
+ Opcode, DL, VA.getLocVT(), Val,
+ DAG.getConstant(LocSizeInBits - ValSizeInBits, VA.getLocVT()));
+ break;
+ }
+ }
+
+ // If this is an value smaller than the argument slot size (32-bit for O32,
+ // 64-bit for N32/N64), it has been promoted in some way to the argument slot
+ // size. Extract the value and insert any appropriate assertions regarding
+ // sign/zero extension.
+ switch (VA.getLocInfo()) {
+ default:
+ llvm_unreachable("Unknown loc info!");
+ case CCValAssign::Full:
+ break;
+ case CCValAssign::AExtUpper:
+ case CCValAssign::AExt:
+ Val = DAG.getNode(ISD::TRUNCATE, DL, ValVT, Val);
+ break;
+ case CCValAssign::SExtUpper:
+ case CCValAssign::SExt:
+ Val = DAG.getNode(ISD::AssertSext, DL, LocVT, Val, DAG.getValueType(ValVT));
+ Val = DAG.getNode(ISD::TRUNCATE, DL, ValVT, Val);
+ break;
+ case CCValAssign::ZExtUpper:
+ case CCValAssign::ZExt:
+ Val = DAG.getNode(ISD::AssertZext, DL, LocVT, Val, DAG.getValueType(ValVT));
+ Val = DAG.getNode(ISD::TRUNCATE, DL, ValVT, Val);
+ break;
+ case CCValAssign::BCvt:
+ Val = DAG.getNode(ISD::BITCAST, DL, ValVT, Val);
+ break;
+ }
+
+ return Val;
+}
+
//===----------------------------------------------------------------------===//
// Formal Arguments Calling Convention Implementation
//===----------------------------------------------------------------------===//
@@ -2659,20 +2843,19 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(),
- getTargetMachine(), ArgLocs, *DAG.getContext());
- MipsCC MipsCCInfo(CallConv, Subtarget.isABI_O32(), Subtarget.isFP64bit(),
- CCInfo);
+ MipsCCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext());
+ const MipsABIInfo &ABI = Subtarget.getABI();
+ CCInfo.AllocateStack(ABI.GetCalleeAllocdArgSizeInBytes(CallConv), 1);
Function::const_arg_iterator FuncArg =
DAG.getMachineFunction().getFunction()->arg_begin();
- bool UseSoftFloat = Subtarget.abiUsesSoftFloat();
- MipsCCInfo.analyzeFormalArguments(Ins, UseSoftFloat, FuncArg);
+ CCInfo.AnalyzeFormalArguments(Ins, CC_Mips_FixedArg);
MipsFI->setFormalArgInfo(CCInfo.getNextStackOffset(),
- MipsCCInfo.hasByValArg());
+ CCInfo.getInRegsParamsCount() > 0);
unsigned CurArgIdx = 0;
- MipsCC::byval_iterator ByValArg = MipsCCInfo.byval_begin();
+ CCInfo.rewindByValRegsInfo();
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
@@ -2683,12 +2866,16 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
bool IsRegLoc = VA.isRegLoc();
if (Flags.isByVal()) {
+ unsigned FirstByValReg, LastByValReg;
+ unsigned ByValIdx = CCInfo.getInRegsParamsProcessed();
+ CCInfo.getInRegsParamInfo(ByValIdx, FirstByValReg, LastByValReg);
+
assert(Flags.getByValSize() &&
"ByVal args of size 0 should have been ignored by front-end.");
- assert(ByValArg != MipsCCInfo.byval_end());
+ assert(ByValIdx < CCInfo.getInRegsParamsCount());
copyByValRegs(Chain, DL, OutChains, DAG, Flags, InVals, &*FuncArg,
- MipsCCInfo, *ByValArg);
- ++ByValArg;
+ FirstByValReg, LastByValReg, VA, CCInfo);
+ CCInfo.nextInRegsParam();
continue;
}
@@ -2703,20 +2890,7 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
unsigned Reg = addLiveIn(DAG.getMachineFunction(), ArgReg, RC);
SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegVT);
- // If this is an 8 or 16-bit value, it has been passed promoted
- // to 32 bits. Insert an assert[sz]ext to capture this, then
- // truncate to the right size.
- if (VA.getLocInfo() != CCValAssign::Full) {
- unsigned Opcode = 0;
- if (VA.getLocInfo() == CCValAssign::SExt)
- Opcode = ISD::AssertSext;
- else if (VA.getLocInfo() == CCValAssign::ZExt)
- Opcode = ISD::AssertZext;
- if (Opcode)
- ArgValue = DAG.getNode(Opcode, DL, RegVT, ArgValue,
- DAG.getValueType(ValVT));
- ArgValue = DAG.getNode(ISD::TRUNCATE, DL, ValVT, ArgValue);
- }
+ ArgValue = UnpackFromArgumentSlot(ArgValue, VA, Ins[i].ArgVT, DL, DAG);
// Handle floating point arguments passed in integer registers and
// long double arguments passed in floating point registers.
@@ -2737,21 +2911,34 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
InVals.push_back(ArgValue);
} else { // VA.isRegLoc()
+ MVT LocVT = VA.getLocVT();
+
+ if (Subtarget.isABI_O32()) {
+ // We ought to be able to use LocVT directly but O32 sets it to i32
+ // when allocating floating point values to integer registers.
+ // This shouldn't influence how we load the value into registers unless
+ // we are targetting softfloat.
+ if (VA.getValVT().isFloatingPoint() && !Subtarget.abiUsesSoftFloat())
+ LocVT = VA.getValVT();
+ }
// sanity check
assert(VA.isMemLoc());
// The stack pointer offset is relative to the caller stack frame.
- int FI = MFI->CreateFixedObject(ValVT.getSizeInBits()/8,
+ int FI = MFI->CreateFixedObject(LocVT.getSizeInBits() / 8,
VA.getLocMemOffset(), true);
// Create load nodes to retrieve arguments from the stack
SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
- SDValue Load = DAG.getLoad(ValVT, DL, Chain, FIN,
- MachinePointerInfo::getFixedStack(FI),
- false, false, false, 0);
- InVals.push_back(Load);
- OutChains.push_back(Load.getValue(1));
+ SDValue ArgValue = DAG.getLoad(LocVT, DL, Chain, FIN,
+ MachinePointerInfo::getFixedStack(FI),
+ false, false, false, 0);
+ OutChains.push_back(ArgValue.getValue(1));
+
+ ArgValue = UnpackFromArgumentSlot(ArgValue, VA, Ins[i].ArgVT, DL, DAG);
+
+ InVals.push_back(ArgValue);
}
}
@@ -2773,7 +2960,7 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
}
if (IsVarArg)
- writeVarArgRegs(OutChains, MipsCCInfo, Chain, DL, DAG);
+ writeVarArgRegs(OutChains, Chain, DL, DAG, CCInfo);
// All stores are grouped in one node to allow the matching between
// the size of Ins and InVals. This only happens when on varg functions
@@ -2795,8 +2982,8 @@ MipsTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
const SmallVectorImpl<ISD::OutputArg> &Outs,
LLVMContext &Context) const {
SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CallConv, IsVarArg, MF, getTargetMachine(),
- RVLocs, Context);
+ MipsCCState CCInfo(CallConv, IsVarArg, MF, getTargetMachine(),
+ RVLocs, Context);
return CCInfo.CheckReturn(Outs, RetCC_Mips);
}
@@ -2812,14 +2999,11 @@ MipsTargetLowering::LowerReturn(SDValue Chain,
MachineFunction &MF = DAG.getMachineFunction();
// CCState - Info about the registers and stack slot.
- CCState CCInfo(CallConv, IsVarArg, MF, getTargetMachine(), RVLocs,
- *DAG.getContext());
- MipsCC MipsCCInfo(CallConv, Subtarget.isABI_O32(), Subtarget.isFP64bit(),
- CCInfo);
+ MipsCCState CCInfo(CallConv, IsVarArg, MF, getTargetMachine(), RVLocs,
+ *DAG.getContext());
// Analyze return values.
- MipsCCInfo.analyzeReturn(Outs, Subtarget.abiUsesSoftFloat(),
- MF.getFunction()->getReturnType());
+ CCInfo.AnalyzeReturn(Outs, RetCC_Mips);
SDValue Flag;
SmallVector<SDValue, 4> RetOps(1, Chain);
@@ -2829,9 +3013,43 @@ MipsTargetLowering::LowerReturn(SDValue Chain,
SDValue Val = OutVals[i];
CCValAssign &VA = RVLocs[i];
assert(VA.isRegLoc() && "Can only return in registers!");
+ bool UseUpperBits = false;
- if (RVLocs[i].getValVT() != RVLocs[i].getLocVT())
- Val = DAG.getNode(ISD::BITCAST, DL, RVLocs[i].getLocVT(), Val);
+ switch (VA.getLocInfo()) {
+ default:
+ llvm_unreachable("Unknown loc info!");
+ case CCValAssign::Full:
+ break;
+ case CCValAssign::BCvt:
+ Val = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), Val);
+ break;
+ case CCValAssign::AExtUpper:
+ UseUpperBits = true;
+ // Fallthrough
+ case CCValAssign::AExt:
+ Val = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Val);
+ break;
+ case CCValAssign::ZExtUpper:
+ UseUpperBits = true;
+ // Fallthrough
+ case CCValAssign::ZExt:
+ Val = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Val);
+ break;
+ case CCValAssign::SExtUpper:
+ UseUpperBits = true;
+ // Fallthrough
+ case CCValAssign::SExt:
+ Val = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Val);
+ break;
+ }
+
+ if (UseUpperBits) {
+ unsigned ValSizeInBits = Outs[i].ArgVT.getSizeInBits();
+ unsigned LocSizeInBits = VA.getLocVT().getSizeInBits();
+ Val = DAG.getNode(
+ ISD::SHL, DL, VA.getLocVT(), Val,
+ DAG.getConstant(LocSizeInBits - ValSizeInBits, VA.getLocVT()));
+ }
Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Flag);
@@ -3267,285 +3485,27 @@ unsigned MipsTargetLowering::getJumpTableEncoding() const {
return TargetLowering::getJumpTableEncoding();
}
-/// This function returns true if CallSym is a long double emulation routine.
-static bool isF128SoftLibCall(const char *CallSym) {
- const char *const LibCalls[] =
- {"__addtf3", "__divtf3", "__eqtf2", "__extenddftf2", "__extendsftf2",
- "__fixtfdi", "__fixtfsi", "__fixtfti", "__fixunstfdi", "__fixunstfsi",
- "__fixunstfti", "__floatditf", "__floatsitf", "__floattitf",
- "__floatunditf", "__floatunsitf", "__floatuntitf", "__getf2", "__gttf2",
- "__letf2", "__lttf2", "__multf3", "__netf2", "__powitf2", "__subtf3",
- "__trunctfdf2", "__trunctfsf2", "__unordtf2",
- "ceill", "copysignl", "cosl", "exp2l", "expl", "floorl", "fmal", "fmodl",
- "log10l", "log2l", "logl", "nearbyintl", "powl", "rintl", "sinl", "sqrtl",
- "truncl"};
-
- const char *const *End = LibCalls + array_lengthof(LibCalls);
-
- // Check that LibCalls is sorted alphabetically.
- MipsTargetLowering::LTStr Comp;
-
-#ifndef NDEBUG
- for (const char *const *I = LibCalls; I < End - 1; ++I)
- assert(Comp(*I, *(I + 1)));
-#endif
-
- return std::binary_search(LibCalls, End, CallSym, Comp);
-}
-
-/// This function returns true if Ty is fp128 or i128 which was originally a
-/// fp128.
-static bool originalTypeIsF128(const Type *Ty, const SDNode *CallNode) {
- if (Ty->isFP128Ty())
- return true;
-
- const ExternalSymbolSDNode *ES =
- dyn_cast_or_null<const ExternalSymbolSDNode>(CallNode);
-
- // If the Ty is i128 and the function being called is a long double emulation
- // routine, then the original type is f128.
- return (ES && Ty->isIntegerTy(128) && isF128SoftLibCall(ES->getSymbol()));
-}
-
-MipsTargetLowering::MipsCC::SpecialCallingConvType
- MipsTargetLowering::getSpecialCallingConv(SDValue Callee) const {
- MipsCC::SpecialCallingConvType SpecialCallingConv =
- MipsCC::NoSpecialCallingConv;
- if (Subtarget.inMips16HardFloat()) {
- if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
- llvm::StringRef Sym = G->getGlobal()->getName();
- Function *F = G->getGlobal()->getParent()->getFunction(Sym);
- if (F && F->hasFnAttribute("__Mips16RetHelper")) {
- SpecialCallingConv = MipsCC::Mips16RetHelperConv;
- }
- }
- }
- return SpecialCallingConv;
-}
-
-MipsTargetLowering::MipsCC::MipsCC(
- CallingConv::ID CC, bool IsO32_, bool IsFP64_, CCState &Info,
- MipsCC::SpecialCallingConvType SpecialCallingConv_)
- : CCInfo(Info), CallConv(CC), IsO32(IsO32_), IsFP64(IsFP64_),
- SpecialCallingConv(SpecialCallingConv_){
- // Pre-allocate reserved argument area.
- CCInfo.AllocateStack(reservedArgArea(), 1);
-}
-
-
-void MipsTargetLowering::MipsCC::
-analyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Args,
- bool IsVarArg, bool IsSoftFloat, const SDNode *CallNode,
- std::vector<ArgListEntry> &FuncArgs) {
- assert((CallConv != CallingConv::Fast || !IsVarArg) &&
- "CallingConv::Fast shouldn't be used for vararg functions.");
-
- unsigned NumOpnds = Args.size();
- llvm::CCAssignFn *FixedFn = fixedArgFn(), *VarFn = varArgFn();
-
- for (unsigned I = 0; I != NumOpnds; ++I) {
- MVT ArgVT = Args[I].VT;
- ISD::ArgFlagsTy ArgFlags = Args[I].Flags;
- bool R;
-
- if (ArgFlags.isByVal()) {
- handleByValArg(I, ArgVT, ArgVT, CCValAssign::Full, ArgFlags);
- continue;
- }
-
- if (IsVarArg && !Args[I].IsFixed)
- R = VarFn(I, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo);
- else {
- MVT RegVT = getRegVT(ArgVT, FuncArgs[Args[I].OrigArgIndex].Ty, CallNode,
- IsSoftFloat);
- R = FixedFn(I, ArgVT, RegVT, CCValAssign::Full, ArgFlags, CCInfo);
- }
-
- if (R) {
-#ifndef NDEBUG
- dbgs() << "Call operand #" << I << " has unhandled type "
- << EVT(ArgVT).getEVTString();
-#endif
- llvm_unreachable(nullptr);
- }
- }
-}
-
-void MipsTargetLowering::MipsCC::
-analyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Args,
- bool IsSoftFloat, Function::const_arg_iterator FuncArg) {
- unsigned NumArgs = Args.size();
- llvm::CCAssignFn *FixedFn = fixedArgFn();
- unsigned CurArgIdx = 0;
-
- for (unsigned I = 0; I != NumArgs; ++I) {
- MVT ArgVT = Args[I].VT;
- ISD::ArgFlagsTy ArgFlags = Args[I].Flags;
- std::advance(FuncArg, Args[I].OrigArgIndex - CurArgIdx);
- CurArgIdx = Args[I].OrigArgIndex;
-
- if (ArgFlags.isByVal()) {
- handleByValArg(I, ArgVT, ArgVT, CCValAssign::Full, ArgFlags);
- continue;
- }
-
- MVT RegVT = getRegVT(ArgVT, FuncArg->getType(), nullptr, IsSoftFloat);
-
- if (!FixedFn(I, ArgVT, RegVT, CCValAssign::Full, ArgFlags, CCInfo))
- continue;
-
-#ifndef NDEBUG
- dbgs() << "Formal Arg #" << I << " has unhandled type "
- << EVT(ArgVT).getEVTString();
-#endif
- llvm_unreachable(nullptr);
- }
-}
-
-template<typename Ty>
-void MipsTargetLowering::MipsCC::
-analyzeReturn(const SmallVectorImpl<Ty> &RetVals, bool IsSoftFloat,
- const SDNode *CallNode, const Type *RetTy) const {
- CCAssignFn *Fn;
-
- if (IsSoftFloat && originalTypeIsF128(RetTy, CallNode))
- Fn = RetCC_F128Soft;
- else
- Fn = RetCC_Mips;
-
- for (unsigned I = 0, E = RetVals.size(); I < E; ++I) {
- MVT VT = RetVals[I].VT;
- ISD::ArgFlagsTy Flags = RetVals[I].Flags;
- MVT RegVT = this->getRegVT(VT, RetTy, CallNode, IsSoftFloat);
-
- if (Fn(I, VT, RegVT, CCValAssign::Full, Flags, this->CCInfo)) {
-#ifndef NDEBUG
- dbgs() << "Call result #" << I << " has unhandled type "
- << EVT(VT).getEVTString() << '\n';
-#endif
- llvm_unreachable(nullptr);
- }
- }
-}
-
-void MipsTargetLowering::MipsCC::
-analyzeCallResult(const SmallVectorImpl<ISD::InputArg> &Ins, bool IsSoftFloat,
- const SDNode *CallNode, const Type *RetTy) const {
- analyzeReturn(Ins, IsSoftFloat, CallNode, RetTy);
-}
-
-void MipsTargetLowering::MipsCC::
-analyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsSoftFloat,
- const Type *RetTy) const {
- analyzeReturn(Outs, IsSoftFloat, nullptr, RetTy);
-}
-
-void MipsTargetLowering::MipsCC::handleByValArg(unsigned ValNo, MVT ValVT,
- MVT LocVT,
- CCValAssign::LocInfo LocInfo,
- ISD::ArgFlagsTy ArgFlags) {
- assert(ArgFlags.getByValSize() && "Byval argument's size shouldn't be 0.");
-
- struct ByValArgInfo ByVal;
- unsigned RegSize = regSize();
- unsigned ByValSize = RoundUpToAlignment(ArgFlags.getByValSize(), RegSize);
- unsigned Align = std::min(std::max(ArgFlags.getByValAlign(), RegSize),
- RegSize * 2);
-
- if (useRegsForByval())
- allocateRegs(ByVal, ByValSize, Align);
-
- // Allocate space on caller's stack.
- ByVal.Address = CCInfo.AllocateStack(ByValSize - RegSize * ByVal.NumRegs,
- Align);
- CCInfo.addLoc(CCValAssign::getMem(ValNo, ValVT, ByVal.Address, LocVT,
- LocInfo));
- ByValArgs.push_back(ByVal);
-}
-
-unsigned MipsTargetLowering::MipsCC::numIntArgRegs() const {
- return IsO32 ? array_lengthof(O32IntRegs) : array_lengthof(Mips64IntRegs);
-}
-
-unsigned MipsTargetLowering::MipsCC::reservedArgArea() const {
- return (IsO32 && (CallConv != CallingConv::Fast)) ? 16 : 0;
-}
-
-const MCPhysReg *MipsTargetLowering::MipsCC::intArgRegs() const {
- return IsO32 ? O32IntRegs : Mips64IntRegs;
-}
-
-llvm::CCAssignFn *MipsTargetLowering::MipsCC::fixedArgFn() const {
- if (CallConv == CallingConv::Fast)
- return CC_Mips_FastCC;
-
- if (SpecialCallingConv == Mips16RetHelperConv)
- return CC_Mips16RetHelper;
- return IsO32 ? (IsFP64 ? CC_MipsO32_FP64 : CC_MipsO32_FP32) : CC_MipsN;
-}
-
-llvm::CCAssignFn *MipsTargetLowering::MipsCC::varArgFn() const {
- return IsO32 ? (IsFP64 ? CC_MipsO32_FP64 : CC_MipsO32_FP32) : CC_MipsN_VarArg;
-}
-
-const MCPhysReg *MipsTargetLowering::MipsCC::shadowRegs() const {
- return IsO32 ? O32IntRegs : Mips64DPRegs;
-}
-
-void MipsTargetLowering::MipsCC::allocateRegs(ByValArgInfo &ByVal,
- unsigned ByValSize,
- unsigned Align) {
- unsigned RegSize = regSize(), NumIntArgRegs = numIntArgRegs();
- const MCPhysReg *IntArgRegs = intArgRegs(), *ShadowRegs = shadowRegs();
- assert(!(ByValSize % RegSize) && !(Align % RegSize) &&
- "Byval argument's size and alignment should be a multiple of"
- "RegSize.");
-
- ByVal.FirstIdx = CCInfo.getFirstUnallocated(IntArgRegs, NumIntArgRegs);
-
- // If Align > RegSize, the first arg register must be even.
- if ((Align > RegSize) && (ByVal.FirstIdx % 2)) {
- CCInfo.AllocateReg(IntArgRegs[ByVal.FirstIdx], ShadowRegs[ByVal.FirstIdx]);
- ++ByVal.FirstIdx;
- }
-
- // Mark the registers allocated.
- for (unsigned I = ByVal.FirstIdx; ByValSize && (I < NumIntArgRegs);
- ByValSize -= RegSize, ++I, ++ByVal.NumRegs)
- CCInfo.AllocateReg(IntArgRegs[I], ShadowRegs[I]);
-}
-
-MVT MipsTargetLowering::MipsCC::getRegVT(MVT VT, const Type *OrigTy,
- const SDNode *CallNode,
- bool IsSoftFloat) const {
- if (IsSoftFloat || IsO32)
- return VT;
-
- // Check if the original type was fp128.
- if (originalTypeIsF128(OrigTy, CallNode)) {
- assert(VT == MVT::i64);
- return MVT::f64;
- }
-
- return VT;
-}
-
-void MipsTargetLowering::
-copyByValRegs(SDValue Chain, SDLoc DL, std::vector<SDValue> &OutChains,
- SelectionDAG &DAG, const ISD::ArgFlagsTy &Flags,
- SmallVectorImpl<SDValue> &InVals, const Argument *FuncArg,
- const MipsCC &CC, const ByValArgInfo &ByVal) const {
+void MipsTargetLowering::copyByValRegs(
+ SDValue Chain, SDLoc DL, std::vector<SDValue> &OutChains, SelectionDAG &DAG,
+ const ISD::ArgFlagsTy &Flags, SmallVectorImpl<SDValue> &InVals,
+ const Argument *FuncArg, unsigned FirstReg, unsigned LastReg,
+ const CCValAssign &VA, MipsCCState &State) const {
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
- unsigned RegAreaSize = ByVal.NumRegs * CC.regSize();
+ unsigned GPRSizeInBytes = Subtarget.getGPRSizeInBytes();
+ unsigned NumRegs = LastReg - FirstReg;
+ unsigned RegAreaSize = NumRegs * GPRSizeInBytes;
unsigned FrameObjSize = std::max(Flags.getByValSize(), RegAreaSize);
int FrameObjOffset;
+ const MipsABIInfo &ABI = Subtarget.getABI();
+ ArrayRef<MCPhysReg> ByValArgRegs = ABI.GetByValArgRegs();
if (RegAreaSize)
- FrameObjOffset = (int)CC.reservedArgArea() -
- (int)((CC.numIntArgRegs() - ByVal.FirstIdx) * CC.regSize());
+ FrameObjOffset =
+ (int)ABI.GetCalleeAllocdArgSizeInBytes(State.getCallingConv()) -
+ (int)((ByValArgRegs.size() - FirstReg) * GPRSizeInBytes);
else
- FrameObjOffset = ByVal.Address;
+ FrameObjOffset = VA.getLocMemOffset();
// Create frame object.
EVT PtrTy = getPointerTy();
@@ -3553,17 +3513,17 @@ copyByValRegs(SDValue Chain, SDLoc DL, std::vector<SDValue> &OutChains,
SDValue FIN = DAG.getFrameIndex(FI, PtrTy);
InVals.push_back(FIN);
- if (!ByVal.NumRegs)
+ if (!NumRegs)
return;
// Copy arg registers.
- MVT RegTy = MVT::getIntegerVT(CC.regSize() * 8);
+ MVT RegTy = MVT::getIntegerVT(GPRSizeInBytes * 8);
const TargetRegisterClass *RC = getRegClassFor(RegTy);
- for (unsigned I = 0; I < ByVal.NumRegs; ++I) {
- unsigned ArgReg = CC.intArgRegs()[ByVal.FirstIdx + I];
+ for (unsigned I = 0; I < NumRegs; ++I) {
+ unsigned ArgReg = ByValArgRegs[FirstReg + I];
unsigned VReg = addLiveIn(MF, ArgReg, RC);
- unsigned Offset = I * CC.regSize();
+ unsigned Offset = I * GPRSizeInBytes;
SDValue StorePtr = DAG.getNode(ISD::ADD, DL, PtrTy, FIN,
DAG.getConstant(Offset, PtrTy));
SDValue Store = DAG.getStore(Chain, DL, DAG.getRegister(VReg, RegTy),
@@ -3574,34 +3534,34 @@ copyByValRegs(SDValue Chain, SDLoc DL, std::vector<SDValue> &OutChains,
}
// Copy byVal arg to registers and stack.
-void MipsTargetLowering::
-passByValArg(SDValue Chain, SDLoc DL,
- std::deque< std::pair<unsigned, SDValue> > &RegsToPass,
- SmallVectorImpl<SDValue> &MemOpChains, SDValue StackPtr,
- MachineFrameInfo *MFI, SelectionDAG &DAG, SDValue Arg,
- const MipsCC &CC, const ByValArgInfo &ByVal,
- const ISD::ArgFlagsTy &Flags, bool isLittle) const {
+void MipsTargetLowering::passByValArg(
+ SDValue Chain, SDLoc DL,
+ std::deque<std::pair<unsigned, SDValue>> &RegsToPass,
+ SmallVectorImpl<SDValue> &MemOpChains, SDValue StackPtr,
+ MachineFrameInfo *MFI, SelectionDAG &DAG, SDValue Arg, unsigned FirstReg,
+ unsigned LastReg, const ISD::ArgFlagsTy &Flags, bool isLittle,
+ const CCValAssign &VA) const {
unsigned ByValSizeInBytes = Flags.getByValSize();
unsigned OffsetInBytes = 0; // From beginning of struct
- unsigned RegSizeInBytes = CC.regSize();
+ unsigned RegSizeInBytes = Subtarget.getGPRSizeInBytes();
unsigned Alignment = std::min(Flags.getByValAlign(), RegSizeInBytes);
EVT PtrTy = getPointerTy(), RegTy = MVT::getIntegerVT(RegSizeInBytes * 8);
+ unsigned NumRegs = LastReg - FirstReg;
- if (ByVal.NumRegs) {
- const MCPhysReg *ArgRegs = CC.intArgRegs();
- bool LeftoverBytes = (ByVal.NumRegs * RegSizeInBytes > ByValSizeInBytes);
+ if (NumRegs) {
+ const ArrayRef<MCPhysReg> ArgRegs = Subtarget.getABI().GetByValArgRegs();
+ bool LeftoverBytes = (NumRegs * RegSizeInBytes > ByValSizeInBytes);
unsigned I = 0;
// Copy words to registers.
- for (; I < ByVal.NumRegs - LeftoverBytes;
- ++I, OffsetInBytes += RegSizeInBytes) {
+ for (; I < NumRegs - LeftoverBytes; ++I, OffsetInBytes += RegSizeInBytes) {
SDValue LoadPtr = DAG.getNode(ISD::ADD, DL, PtrTy, Arg,
DAG.getConstant(OffsetInBytes, PtrTy));
SDValue LoadVal = DAG.getLoad(RegTy, DL, Chain, LoadPtr,
MachinePointerInfo(), false, false, false,
Alignment);
MemOpChains.push_back(LoadVal.getValue(1));
- unsigned ArgReg = ArgRegs[ByVal.FirstIdx + I];
+ unsigned ArgReg = ArgRegs[FirstReg + I];
RegsToPass.push_back(std::make_pair(ArgReg, LoadVal));
}
@@ -3611,9 +3571,6 @@ passByValArg(SDValue Chain, SDLoc DL,
// Copy the remainder of the byval argument with sub-word loads and shifts.
if (LeftoverBytes) {
- assert((ByValSizeInBytes > OffsetInBytes) &&
- (ByValSizeInBytes < OffsetInBytes + RegSizeInBytes) &&
- "Size of the remainder should be smaller than RegSizeInBytes.");
SDValue Val;
for (unsigned LoadSizeInBytes = RegSizeInBytes / 2, TotalBytesLoaded = 0;
@@ -3652,7 +3609,7 @@ passByValArg(SDValue Chain, SDLoc DL,
Alignment = std::min(Alignment, LoadSizeInBytes);
}
- unsigned ArgReg = ArgRegs[ByVal.FirstIdx + I];
+ unsigned ArgReg = ArgRegs[FirstReg + I];
RegsToPass.push_back(std::make_pair(ArgReg, Val));
return;
}
@@ -3663,7 +3620,7 @@ passByValArg(SDValue Chain, SDLoc DL,
SDValue Src = DAG.getNode(ISD::ADD, DL, PtrTy, Arg,
DAG.getConstant(OffsetInBytes, PtrTy));
SDValue Dst = DAG.getNode(ISD::ADD, DL, PtrTy, StackPtr,
- DAG.getIntPtrConstant(ByVal.Address));
+ DAG.getIntPtrConstant(VA.getLocMemOffset()));
Chain = DAG.getMemcpy(Chain, DL, Dst, Src, DAG.getConstant(MemCpySize, PtrTy),
Alignment, /*isVolatile=*/false, /*AlwaysInline=*/false,
MachinePointerInfo(), MachinePointerInfo());
@@ -3671,14 +3628,13 @@ passByValArg(SDValue Chain, SDLoc DL,
}
void MipsTargetLowering::writeVarArgRegs(std::vector<SDValue> &OutChains,
- const MipsCC &CC, SDValue Chain,
- SDLoc DL, SelectionDAG &DAG) const {
- unsigned NumRegs = CC.numIntArgRegs();
- const MCPhysReg *ArgRegs = CC.intArgRegs();
- const CCState &CCInfo = CC.getCCInfo();
- unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs, NumRegs);
- unsigned RegSize = CC.regSize();
- MVT RegTy = MVT::getIntegerVT(RegSize * 8);
+ SDValue Chain, SDLoc DL,
+ SelectionDAG &DAG,
+ CCState &State) const {
+ const ArrayRef<MCPhysReg> ArgRegs = Subtarget.getABI().GetVarArgRegs();
+ unsigned Idx = State.getFirstUnallocated(ArgRegs.data(), ArgRegs.size());
+ unsigned RegSizeInBytes = Subtarget.getGPRSizeInBytes();
+ MVT RegTy = MVT::getIntegerVT(RegSizeInBytes * 8);
const TargetRegisterClass *RC = getRegClassFor(RegTy);
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
@@ -3687,24 +3643,30 @@ void MipsTargetLowering::writeVarArgRegs(std::vector<SDValue> &OutChains,
// Offset of the first variable argument from stack pointer.
int VaArgOffset;
- if (NumRegs == Idx)
- VaArgOffset = RoundUpToAlignment(CCInfo.getNextStackOffset(), RegSize);
- else
- VaArgOffset = (int)CC.reservedArgArea() - (int)(RegSize * (NumRegs - Idx));
+ if (ArgRegs.size() == Idx)
+ VaArgOffset =
+ RoundUpToAlignment(State.getNextStackOffset(), RegSizeInBytes);
+ else {
+ const MipsABIInfo &ABI = Subtarget.getABI();
+ VaArgOffset =
+ (int)ABI.GetCalleeAllocdArgSizeInBytes(State.getCallingConv()) -
+ (int)(RegSizeInBytes * (ArgRegs.size() - Idx));
+ }
// Record the frame index of the first variable argument
// which is a value necessary to VASTART.
- int FI = MFI->CreateFixedObject(RegSize, VaArgOffset, true);
+ int FI = MFI->CreateFixedObject(RegSizeInBytes, VaArgOffset, true);
MipsFI->setVarArgsFrameIndex(FI);
// Copy the integer registers that have not been used for argument passing
// to the argument register save area. For O32, the save area is allocated
// in the caller's stack frame, while for N32/64, it is allocated in the
// callee's stack frame.
- for (unsigned I = Idx; I < NumRegs; ++I, VaArgOffset += RegSize) {
+ for (unsigned I = Idx; I < ArgRegs.size();
+ ++I, VaArgOffset += RegSizeInBytes) {
unsigned Reg = addLiveIn(MF, ArgRegs[I], RC);
SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegTy);
- FI = MFI->CreateFixedObject(RegSize, VaArgOffset, true);
+ FI = MFI->CreateFixedObject(RegSizeInBytes, VaArgOffset, true);
SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy());
SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
MachinePointerInfo(), false, false, 0);
@@ -3713,3 +3675,49 @@ void MipsTargetLowering::writeVarArgRegs(std::vector<SDValue> &OutChains,
OutChains.push_back(Store);
}
}
+
+void MipsTargetLowering::HandleByVal(CCState *State, unsigned &Size,
+ unsigned Align) const {
+ MachineFunction &MF = State->getMachineFunction();
+ const TargetFrameLowering *TFL = MF.getTarget().getFrameLowering();
+
+ assert(Size && "Byval argument's size shouldn't be 0.");
+
+ Align = std::min(Align, TFL->getStackAlignment());
+
+ unsigned FirstReg = 0;
+ unsigned NumRegs = 0;
+
+ if (State->getCallingConv() != CallingConv::Fast) {
+ unsigned RegSizeInBytes = Subtarget.getGPRSizeInBytes();
+ const ArrayRef<MCPhysReg> IntArgRegs = Subtarget.getABI().GetByValArgRegs();
+ // FIXME: The O32 case actually describes no shadow registers.
+ const MCPhysReg *ShadowRegs =
+ Subtarget.isABI_O32() ? IntArgRegs.data() : Mips64DPRegs;
+
+ // We used to check the size as well but we can't do that anymore since
+ // CCState::HandleByVal() rounds up the size after calling this function.
+ assert(!(Align % RegSizeInBytes) &&
+ "Byval argument's alignment should be a multiple of"
+ "RegSizeInBytes.");
+
+ FirstReg = State->getFirstUnallocated(IntArgRegs.data(), IntArgRegs.size());
+
+ // If Align > RegSizeInBytes, the first arg register must be even.
+ // FIXME: This condition happens to do the right thing but it's not the
+ // right way to test it. We want to check that the stack frame offset
+ // of the register is aligned.
+ if ((Align > RegSizeInBytes) && (FirstReg % 2)) {
+ State->AllocateReg(IntArgRegs[FirstReg], ShadowRegs[FirstReg]);
+ ++FirstReg;
+ }
+
+ // Mark the registers allocated.
+ Size = RoundUpToAlignment(Size, RegSizeInBytes);
+ for (unsigned I = FirstReg; Size > 0 && (I < IntArgRegs.size());
+ Size -= RegSizeInBytes, ++I, ++NumRegs)
+ State->AllocateReg(IntArgRegs[I], ShadowRegs[I]);
+ }
+
+ State->addInRegsParamInfo(FirstReg, FirstReg + NumRegs);
+}
diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h
index 10e4e0b48644..23163b73e0a4 100644
--- a/lib/Target/Mips/MipsISelLowering.h
+++ b/lib/Target/Mips/MipsISelLowering.h
@@ -210,6 +210,7 @@ namespace llvm {
//===--------------------------------------------------------------------===//
class MipsFunctionInfo;
class MipsSubtarget;
+ class MipsCCState;
class MipsTargetLowering : public TargetLowering {
bool isMicroMips;
@@ -259,6 +260,8 @@ namespace llvm {
}
};
+ void HandleByVal(CCState *, unsigned &, unsigned) const override;
+
protected:
SDValue getGlobalReg(SelectionDAG &DAG, EVT Ty) const;
@@ -338,101 +341,6 @@ namespace llvm {
bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage,
CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const;
- /// ByValArgInfo - Byval argument information.
- struct ByValArgInfo {
- unsigned FirstIdx; // Index of the first register used.
- unsigned NumRegs; // Number of registers used for this argument.
- unsigned Address; // Offset of the stack area used to pass this argument.
-
- ByValArgInfo() : FirstIdx(0), NumRegs(0), Address(0) {}
- };
-
- /// MipsCC - This class provides methods used to analyze formal and call
- /// arguments and inquire about calling convention information.
- class MipsCC {
- public:
- enum SpecialCallingConvType {
- Mips16RetHelperConv, NoSpecialCallingConv
- };
-
- MipsCC(CallingConv::ID CallConv, bool IsO32, bool IsFP64, CCState &Info,
- SpecialCallingConvType SpecialCallingConv = NoSpecialCallingConv);
-
-
- void analyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs,
- bool IsVarArg, bool IsSoftFloat,
- const SDNode *CallNode,
- std::vector<ArgListEntry> &FuncArgs);
- void analyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Ins,
- bool IsSoftFloat,
- Function::const_arg_iterator FuncArg);
-
- void analyzeCallResult(const SmallVectorImpl<ISD::InputArg> &Ins,
- bool IsSoftFloat, const SDNode *CallNode,
- const Type *RetTy) const;
-
- void analyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs,
- bool IsSoftFloat, const Type *RetTy) const;
-
- const CCState &getCCInfo() const { return CCInfo; }
-
- /// hasByValArg - Returns true if function has byval arguments.
- bool hasByValArg() const { return !ByValArgs.empty(); }
-
- /// regSize - Size (in number of bits) of integer registers.
- unsigned regSize() const { return IsO32 ? 4 : 8; }
-
- /// numIntArgRegs - Number of integer registers available for calls.
- unsigned numIntArgRegs() const;
-
- /// reservedArgArea - The size of the area the caller reserves for
- /// register arguments. This is 16-byte if ABI is O32.
- unsigned reservedArgArea() const;
-
- /// Return pointer to array of integer argument registers.
- const MCPhysReg *intArgRegs() const;
-
- typedef SmallVectorImpl<ByValArgInfo>::const_iterator byval_iterator;
- byval_iterator byval_begin() const { return ByValArgs.begin(); }
- byval_iterator byval_end() const { return ByValArgs.end(); }
-
- private:
- void handleByValArg(unsigned ValNo, MVT ValVT, MVT LocVT,
- CCValAssign::LocInfo LocInfo,
- ISD::ArgFlagsTy ArgFlags);
-
- /// useRegsForByval - Returns true if the calling convention allows the
- /// use of registers to pass byval arguments.
- bool useRegsForByval() const { return CallConv != CallingConv::Fast; }
-
- /// Return the function that analyzes fixed argument list functions.
- llvm::CCAssignFn *fixedArgFn() const;
-
- /// Return the function that analyzes variable argument list functions.
- llvm::CCAssignFn *varArgFn() const;
-
- const MCPhysReg *shadowRegs() const;
-
- void allocateRegs(ByValArgInfo &ByVal, unsigned ByValSize,
- unsigned Align);
-
- /// Return the type of the register which is used to pass an argument or
- /// return a value. This function returns f64 if the argument is an i64
- /// value which has been generated as a result of softening an f128 value.
- /// Otherwise, it just returns VT.
- MVT getRegVT(MVT VT, const Type *OrigTy, const SDNode *CallNode,
- bool IsSoftFloat) const;
-
- template<typename Ty>
- void analyzeReturn(const SmallVectorImpl<Ty> &RetVals, bool IsSoftFloat,
- const SDNode *CallNode, const Type *RetTy) const;
-
- CCState &CCInfo;
- CallingConv::ID CallConv;
- bool IsO32, IsFP64;
- SpecialCallingConvType SpecialCallingConv;
- SmallVector<ByValArgInfo, 2> ByValArgs;
- };
protected:
SDValue lowerLOAD(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerSTORE(SDValue Op, SelectionDAG &DAG) const;
@@ -461,14 +369,12 @@ namespace llvm {
SDValue getTargetNode(ConstantPoolSDNode *N, EVT Ty, SelectionDAG &DAG,
unsigned Flag) const;
- MipsCC::SpecialCallingConvType getSpecialCallingConv(SDValue Callee) const;
// Lower Operand helpers
SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
CallingConv::ID CallConv, bool isVarArg,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- SDLoc dl, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals,
- const SDNode *CallNode, const Type *RetTy) const;
+ const SmallVectorImpl<ISD::InputArg> &Ins, SDLoc dl,
+ SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
+ TargetLowering::CallLoweringInfo &CLI) const;
// Lower Operand specifics
SDValue lowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
@@ -482,6 +388,7 @@ namespace llvm {
SDValue lowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerSETCC(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerVASTART(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerVAARG(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerFABS(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
@@ -497,33 +404,34 @@ namespace llvm {
/// isEligibleForTailCallOptimization - Check whether the call is eligible
/// for tail call optimization.
virtual bool
- isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo,
+ isEligibleForTailCallOptimization(const CCState &CCInfo,
unsigned NextStackOffset,
- const MipsFunctionInfo& FI) const = 0;
+ const MipsFunctionInfo &FI) const = 0;
/// copyByValArg - Copy argument registers which were used to pass a byval
/// argument to the stack. Create a stack frame object for the byval
/// argument.
- void copyByValRegs(SDValue Chain, SDLoc DL,
- std::vector<SDValue> &OutChains, SelectionDAG &DAG,
- const ISD::ArgFlagsTy &Flags,
+ void copyByValRegs(SDValue Chain, SDLoc DL, std::vector<SDValue> &OutChains,
+ SelectionDAG &DAG, const ISD::ArgFlagsTy &Flags,
SmallVectorImpl<SDValue> &InVals,
- const Argument *FuncArg,
- const MipsCC &CC, const ByValArgInfo &ByVal) const;
+ const Argument *FuncArg, unsigned FirstReg,
+ unsigned LastReg, const CCValAssign &VA,
+ MipsCCState &State) const;
/// passByValArg - Pass a byval argument in registers or on stack.
void passByValArg(SDValue Chain, SDLoc DL,
- std::deque< std::pair<unsigned, SDValue> > &RegsToPass,
+ std::deque<std::pair<unsigned, SDValue>> &RegsToPass,
SmallVectorImpl<SDValue> &MemOpChains, SDValue StackPtr,
MachineFrameInfo *MFI, SelectionDAG &DAG, SDValue Arg,
- const MipsCC &CC, const ByValArgInfo &ByVal,
- const ISD::ArgFlagsTy &Flags, bool isLittle) const;
+ unsigned FirstReg, unsigned LastReg,
+ const ISD::ArgFlagsTy &Flags, bool isLittle,
+ const CCValAssign &VA) const;
/// writeVarArgRegs - Write variable function arguments passed in registers
/// to the stack. Also create a stack frame object for the first variable
/// argument.
- void writeVarArgRegs(std::vector<SDValue> &OutChains, const MipsCC &CC,
- SDValue Chain, SDLoc DL, SelectionDAG &DAG) const;
+ void writeVarArgRegs(std::vector<SDValue> &OutChains, SDValue Chain,
+ SDLoc DL, SelectionDAG &DAG, CCState &State) const;
SDValue
LowerFormalArguments(SDValue Chain,
diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td
index 29d8e30be483..1100e1e0323a 100644
--- a/lib/Target/Mips/MipsInstrFPU.td
+++ b/lib/Target/Mips/MipsInstrFPU.td
@@ -178,6 +178,38 @@ class SW_FT<string opstr, RegisterOperand RC, InstrItinClass Itin,
let mayStore = 1;
}
+class SW_FT2<string opstr, RegisterOperand RC, InstrItinClass Itin,
+ SDPatternOperator OpNode= null_frag> :
+ InstSE<(outs), (ins RC:$rt, mem:$addr), !strconcat(opstr, "\t$rt, $addr"),
+ [(OpNode RC:$rt, addrDefault:$addr)], Itin, FrmFI, opstr> {
+ let DecoderMethod = "DecodeFMem2";
+ let mayStore = 1;
+}
+
+class LW_FT2<string opstr, RegisterOperand RC, InstrItinClass Itin,
+ SDPatternOperator OpNode= null_frag> :
+ InstSE<(outs RC:$rt), (ins mem:$addr), !strconcat(opstr, "\t$rt, $addr"),
+ [(set RC:$rt, (OpNode addrDefault:$addr))], Itin, FrmFI, opstr> {
+ let DecoderMethod = "DecodeFMem2";
+ let mayLoad = 1;
+}
+
+class SW_FT3<string opstr, RegisterOperand RC, InstrItinClass Itin,
+ SDPatternOperator OpNode= null_frag> :
+ InstSE<(outs), (ins RC:$rt, mem:$addr), !strconcat(opstr, "\t$rt, $addr"),
+ [(OpNode RC:$rt, addrDefault:$addr)], Itin, FrmFI, opstr> {
+ let DecoderMethod = "DecodeFMem3";
+ let mayStore = 1;
+}
+
+class LW_FT3<string opstr, RegisterOperand RC, InstrItinClass Itin,
+ SDPatternOperator OpNode= null_frag> :
+ InstSE<(outs RC:$rt), (ins mem:$addr), !strconcat(opstr, "\t$rt, $addr"),
+ [(set RC:$rt, (OpNode addrDefault:$addr))], Itin, FrmFI, opstr> {
+ let DecoderMethod = "DecodeFMem3";
+ let mayLoad = 1;
+}
+
class MADDS_FT<string opstr, RegisterOperand RC, InstrItinClass Itin,
SDPatternOperator OpNode = null_frag> :
InstSE<(outs RC:$fd), (ins RC:$fr, RC:$fs, RC:$ft),
@@ -407,24 +439,24 @@ def SDC1 : MMRel, SW_FT<"sdc1", AFGR64Opnd, II_SDC1, store>, LW_FM<0x3d>,
// Cop2 Memory Instructions
// FIXME: These aren't really FPU instructions and as such don't belong in this
// file
-def LWC2 : LW_FT<"lwc2", COP2Opnd, NoItinerary, load>, LW_FM<0x32>,
+def LWC2 : LW_FT2<"lwc2", COP2Opnd, NoItinerary, load>, LW_FM<0x32>,
ISA_MIPS1_NOT_32R6_64R6;
-def SWC2 : SW_FT<"swc2", COP2Opnd, NoItinerary, store>, LW_FM<0x3a>,
+def SWC2 : SW_FT2<"swc2", COP2Opnd, NoItinerary, store>, LW_FM<0x3a>,
ISA_MIPS1_NOT_32R6_64R6;
-def LDC2 : LW_FT<"ldc2", COP2Opnd, NoItinerary, load>, LW_FM<0x36>,
+def LDC2 : LW_FT2<"ldc2", COP2Opnd, NoItinerary, load>, LW_FM<0x36>,
ISA_MIPS2_NOT_32R6_64R6;
-def SDC2 : SW_FT<"sdc2", COP2Opnd, NoItinerary, store>, LW_FM<0x3e>,
+def SDC2 : SW_FT2<"sdc2", COP2Opnd, NoItinerary, store>, LW_FM<0x3e>,
ISA_MIPS2_NOT_32R6_64R6;
// Cop3 Memory Instructions
// FIXME: These aren't really FPU instructions and as such don't belong in this
// file
let DecoderNamespace = "COP3_" in {
- def LWC3 : LW_FT<"lwc3", COP3Opnd, NoItinerary, load>, LW_FM<0x33>;
- def SWC3 : SW_FT<"swc3", COP3Opnd, NoItinerary, store>, LW_FM<0x3b>;
- def LDC3 : LW_FT<"ldc3", COP3Opnd, NoItinerary, load>, LW_FM<0x37>,
+ def LWC3 : LW_FT3<"lwc3", COP3Opnd, NoItinerary, load>, LW_FM<0x33>;
+ def SWC3 : SW_FT3<"swc3", COP3Opnd, NoItinerary, store>, LW_FM<0x3b>;
+ def LDC3 : LW_FT3<"ldc3", COP3Opnd, NoItinerary, load>, LW_FM<0x37>,
ISA_MIPS2;
- def SDC3 : SW_FT<"sdc3", COP3Opnd, NoItinerary, store>, LW_FM<0x3f>,
+ def SDC3 : SW_FT3<"sdc3", COP3Opnd, NoItinerary, store>, LW_FM<0x3f>,
ISA_MIPS2;
}
diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td
index 8e9472c2d9d8..26ecfbf6f4e6 100644
--- a/lib/Target/Mips/MipsInstrInfo.td
+++ b/lib/Target/Mips/MipsInstrInfo.td
@@ -1414,13 +1414,15 @@ def TLBR : TLB<"tlbr">, COP0_TLB_FM<0x01>;
def TLBWI : TLB<"tlbwi">, COP0_TLB_FM<0x02>;
def TLBWR : TLB<"tlbwr">, COP0_TLB_FM<0x06>;
-class CacheOp<string instr_asm, Operand MemOpnd, RegisterOperand GPROpnd> :
+class CacheOp<string instr_asm, Operand MemOpnd> :
InstSE<(outs), (ins MemOpnd:$addr, uimm5:$hint),
- !strconcat(instr_asm, "\t$hint, $addr"), [], NoItinerary, FrmOther>;
+ !strconcat(instr_asm, "\t$hint, $addr"), [], NoItinerary, FrmOther> {
+ let DecoderMethod = "DecodeCacheOp";
+}
-def CACHE : CacheOp<"cache", mem, GPR32Opnd>, CACHEOP_FM<0b101111>,
+def CACHE : CacheOp<"cache", mem>, CACHEOP_FM<0b101111>,
INSN_MIPS3_32_NOT_32R6_64R6;
-def PREF : CacheOp<"pref", mem, GPR32Opnd>, CACHEOP_FM<0b110011>,
+def PREF : CacheOp<"pref", mem>, CACHEOP_FM<0b110011>,
INSN_MIPS3_32_NOT_32R6_64R6;
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Mips/MipsLongBranch.cpp b/lib/Target/Mips/MipsLongBranch.cpp
index 27110b6e870e..5790ed6053c8 100644
--- a/lib/Target/Mips/MipsLongBranch.cpp
+++ b/lib/Target/Mips/MipsLongBranch.cpp
@@ -64,8 +64,8 @@ namespace {
MipsLongBranch(TargetMachine &tm)
: MachineFunctionPass(ID), TM(tm),
IsPIC(TM.getRelocationModel() == Reloc::PIC_),
- ABI(TM.getSubtarget<MipsSubtarget>().getTargetABI()),
- LongBranchSeqSize(!IsPIC ? 2 : (ABI == MipsSubtarget::N64 ? 10 :
+ ABI(TM.getSubtarget<MipsSubtarget>().getABI()),
+ LongBranchSeqSize(!IsPIC ? 2 : (ABI.IsN64() ? 10 :
(!TM.getSubtarget<MipsSubtarget>().isTargetNaCl() ? 9 : 10))) {}
const char *getPassName() const override {
@@ -86,7 +86,7 @@ namespace {
MachineFunction *MF;
SmallVector<MBBInfo, 16> MBBInfos;
bool IsPIC;
- unsigned ABI;
+ MipsABIInfo ABI;
unsigned LongBranchSeqSize;
};
@@ -273,7 +273,7 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) {
const MipsSubtarget &Subtarget = TM.getSubtarget<MipsSubtarget>();
unsigned BalOp = Subtarget.hasMips32r6() ? Mips::BAL : Mips::BAL_BR;
- if (ABI != MipsSubtarget::N64) {
+ if (!ABI.IsN64()) {
// $longbr:
// addiu $sp, $sp, -8
// sw $ra, 0($sp)
diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp
index 084449bba59c..a6169705fe01 100644
--- a/lib/Target/Mips/MipsRegisterInfo.cpp
+++ b/lib/Target/Mips/MipsRegisterInfo.cpp
@@ -149,6 +149,12 @@ getReservedRegs(const MachineFunction &MF) const {
for (unsigned I = 0; I < array_lengthof(ReservedGPR64); ++I)
Reserved.set(ReservedGPR64[I]);
+ // For mno-abicalls, GP is a program invariant!
+ if (!Subtarget.isABICalls()) {
+ Reserved.set(Mips::GP);
+ Reserved.set(Mips::GP_64);
+ }
+
if (Subtarget.isFP64bit()) {
// Reserve all registers in AFGR64.
for (RegIter Reg = Mips::AFGR64RegClass.begin(),
diff --git a/lib/Target/Mips/MipsSEFrameLowering.cpp b/lib/Target/Mips/MipsSEFrameLowering.cpp
index d0a17cd834a0..4825e1e569ee 100644
--- a/lib/Target/Mips/MipsSEFrameLowering.cpp
+++ b/lib/Target/Mips/MipsSEFrameLowering.cpp
@@ -325,6 +325,8 @@ bool ExpandPseudo::expandBuildPairF64(MachineBasicBlock &MBB,
// We re-use the same spill slot each time so that the stack frame doesn't
// grow too much in functions with a large number of moves.
int FI = MF.getInfo<MipsFunctionInfo>()->getMoveF64ViaSpillFI(RC2);
+ if (!Subtarget.isLittle())
+ std::swap(LoReg, HiReg);
TII.storeRegToStack(MBB, I, LoReg, I->getOperand(1).isKill(), FI, RC, &TRI,
0);
TII.storeRegToStack(MBB, I, HiReg, I->getOperand(2).isKill(), FI, RC, &TRI,
@@ -369,6 +371,7 @@ bool ExpandPseudo::expandExtractElementF64(MachineBasicBlock &MBB,
unsigned DstReg = I->getOperand(0).getReg();
unsigned SrcReg = I->getOperand(1).getReg();
unsigned N = I->getOperand(2).getImm();
+ int64_t Offset = 4 * (Subtarget.isLittle() ? N : (1 - N));
// It should be impossible to have FGR64 on MIPS-II or MIPS32r1 (which are
// the cases where mfhc1 is not available). 64-bit architectures and
@@ -385,7 +388,7 @@ bool ExpandPseudo::expandExtractElementF64(MachineBasicBlock &MBB,
int FI = MF.getInfo<MipsFunctionInfo>()->getMoveF64ViaSpillFI(RC);
TII.storeRegToStack(MBB, I, SrcReg, I->getOperand(1).isKill(), FI, RC, &TRI,
0);
- TII.loadRegFromStack(MBB, I, DstReg, FI, RC2, &TRI, N * 4);
+ TII.loadRegFromStack(MBB, I, DstReg, FI, RC2, &TRI, Offset);
return true;
}
diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp
index 8173615cdcb5..d7452e281e36 100644
--- a/lib/Target/Mips/MipsSEISelLowering.cpp
+++ b/lib/Target/Mips/MipsSEISelLowering.cpp
@@ -1167,15 +1167,14 @@ MipsSETargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
}
}
-bool MipsSETargetLowering::
-isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo,
- unsigned NextStackOffset,
- const MipsFunctionInfo& FI) const {
+bool MipsSETargetLowering::isEligibleForTailCallOptimization(
+ const CCState &CCInfo, unsigned NextStackOffset,
+ const MipsFunctionInfo &FI) const {
if (!EnableMipsTailCalls)
return false;
// Return false if either the callee or caller has a byval argument.
- if (MipsCCInfo.hasByValArg() || FI.hasByvalArg())
+ if (CCInfo.getInRegsParamsCount() > 0 || FI.hasByvalArg())
return false;
// Return true if the callee's argument area is no larger than the
diff --git a/lib/Target/Mips/MipsSEISelLowering.h b/lib/Target/Mips/MipsSEISelLowering.h
index 00d86834be0a..2121b3bcaa51 100644
--- a/lib/Target/Mips/MipsSEISelLowering.h
+++ b/lib/Target/Mips/MipsSEISelLowering.h
@@ -50,9 +50,9 @@ namespace llvm {
const TargetRegisterClass *getRepRegClassFor(MVT VT) const override;
private:
- bool isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo,
- unsigned NextStackOffset,
- const MipsFunctionInfo& FI) const override;
+ bool isEligibleForTailCallOptimization(
+ const CCState &CCInfo, unsigned NextStackOffset,
+ const MipsFunctionInfo &FI) const override;
void
getOpndList(SmallVectorImpl<SDValue> &Ops,
diff --git a/lib/Target/Mips/MipsSubtarget.cpp b/lib/Target/Mips/MipsSubtarget.cpp
index 5bf875daea99..9a1c40902e8a 100644
--- a/lib/Target/Mips/MipsSubtarget.cpp
+++ b/lib/Target/Mips/MipsSubtarget.cpp
@@ -106,13 +106,14 @@ MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU,
const std::string &FS, bool little,
MipsTargetMachine *_TM)
: MipsGenSubtargetInfo(TT, CPU, FS), MipsArchVersion(Mips32),
- MipsABI(UnknownABI), IsLittle(little), IsSingleFloat(false),
- IsFPXX(false), IsFP64bit(false), UseOddSPReg(true), IsNaN2008bit(false),
- IsGP64bit(false), HasVFPU(false), HasCnMips(false), IsLinux(true),
- HasMips3_32(false), HasMips3_32r2(false), HasMips4_32(false),
- HasMips4_32r2(false), HasMips5_32r2(false), InMips16Mode(false),
- InMips16HardFloat(Mips16HardFloat), InMicroMipsMode(false), HasDSP(false),
- HasDSPR2(false), AllowMixed16_32(Mixed16_32 | Mips_Os16), Os16(Mips_Os16),
+ ABI(MipsABIInfo::Unknown()), IsLittle(little), IsSingleFloat(false),
+ IsFPXX(false), NoABICalls(false), IsFP64bit(false), UseOddSPReg(true),
+ IsNaN2008bit(false), IsGP64bit(false), HasVFPU(false), HasCnMips(false),
+ IsLinux(true), HasMips3_32(false), HasMips3_32r2(false),
+ HasMips4_32(false), HasMips4_32r2(false), HasMips5_32r2(false),
+ InMips16Mode(false), InMips16HardFloat(Mips16HardFloat),
+ InMicroMipsMode(false), HasDSP(false), HasDSPR2(false),
+ AllowMixed16_32(Mixed16_32 | Mips_Os16), Os16(Mips_Os16),
HasMSA(false), TM(_TM), TargetTriple(TT),
DL(computeDataLayout(initializeSubtargetDependencies(CPU, FS, TM))),
TSInfo(DL), JITInfo(), InstrInfo(MipsInstrInfo::create(*this)),
@@ -135,7 +136,7 @@ MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU,
report_fatal_error("Code generation for MIPS-V is not implemented", false);
// Assert exactly one ABI was chosen.
- assert(MipsABI != UnknownABI);
+ assert(ABI.IsKnown());
assert((((getFeatureBits() & Mips::FeatureO32) != 0) +
((getFeatureBits() & Mips::FeatureEABI) != 0) +
((getFeatureBits() & Mips::FeatureN32) != 0) +
diff --git a/lib/Target/Mips/MipsSubtarget.h b/lib/Target/Mips/MipsSubtarget.h
index f3264621a700..c5b9a3e754d6 100644
--- a/lib/Target/Mips/MipsSubtarget.h
+++ b/lib/Target/Mips/MipsSubtarget.h
@@ -23,6 +23,7 @@
#include "llvm/MC/MCInstrItineraries.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Target/TargetSubtargetInfo.h"
+#include "MipsABIInfo.h"
#include <string>
#define GET_SUBTARGETINFO_HEADER
@@ -36,13 +37,6 @@ class MipsTargetMachine;
class MipsSubtarget : public MipsGenSubtargetInfo {
virtual void anchor();
-public:
- // NOTE: O64 will not be supported.
- enum MipsABIEnum {
- UnknownABI, O32, N32, N64, EABI
- };
-
-protected:
enum MipsArchEnum {
Mips1, Mips2, Mips32, Mips32r2, Mips32r6, Mips3, Mips4, Mips5, Mips64,
Mips64r2, Mips64r6
@@ -51,8 +45,8 @@ protected:
// Mips architecture version
MipsArchEnum MipsArchVersion;
- // Mips supported ABIs
- MipsABIEnum MipsABI;
+ // Selected ABI
+ MipsABIInfo ABI;
// IsLittle - The target is Little Endian
bool IsLittle;
@@ -65,6 +59,9 @@ protected:
// IsFPXX - MIPS O32 modeless ABI.
bool IsFPXX;
+ // NoABICalls - Disable SVR4-style position-independent code.
+ bool NoABICalls;
+
// IsFP64bit - The target processor has 64-bit floating point registers.
bool IsFP64bit;
@@ -157,12 +154,12 @@ public:
CodeGenOpt::Level getOptLevelToEnablePostRAScheduler() const override;
/// Only O32 and EABI supported right now.
- bool isABI_EABI() const { return MipsABI == EABI; }
- bool isABI_N64() const { return MipsABI == N64; }
- bool isABI_N32() const { return MipsABI == N32; }
- bool isABI_O32() const { return MipsABI == O32; }
+ bool isABI_EABI() const { return ABI.IsEABI(); }
+ bool isABI_N64() const { return ABI.IsN64(); }
+ bool isABI_N32() const { return ABI.IsN32(); }
+ bool isABI_O32() const { return ABI.IsO32(); }
bool isABI_FPXX() const { return isABI_O32() && IsFPXX; }
- unsigned getTargetABI() const { return MipsABI; }
+ const MipsABIInfo &getABI() const { return ABI; }
/// This constructor initializes the data members to match that
/// of the specified triple.
@@ -200,16 +197,16 @@ public:
bool hasCnMips() const { return HasCnMips; }
bool isLittle() const { return IsLittle; }
+ bool isABICalls() const { return !NoABICalls; }
bool isFPXX() const { return IsFPXX; }
bool isFP64bit() const { return IsFP64bit; }
bool useOddSPReg() const { return UseOddSPReg; }
bool noOddSPReg() const { return !UseOddSPReg; }
bool isNaN2008() const { return IsNaN2008bit; }
- bool isNotFP64bit() const { return !IsFP64bit; }
bool isGP64bit() const { return IsGP64bit; }
bool isGP32bit() const { return !IsGP64bit; }
+ unsigned getGPRSizeInBytes() const { return isGP64bit() ? 8 : 4; }
bool isSingleFloat() const { return IsSingleFloat; }
- bool isNotSingleFloat() const { return !IsSingleFloat; }
bool hasVFPU() const { return HasVFPU; }
bool inMips16Mode() const { return InMips16Mode; }
bool inMips16ModeDefault() const {
@@ -248,7 +245,6 @@ public:
bool os16() const { return Os16;};
bool isTargetNaCl() const { return TargetTriple.isOSNaCl(); }
- bool isNotTargetNaCl() const { return !TargetTriple.isOSNaCl(); }
// for now constant islands are on for the whole compilation unit but we only
// really use them if in addition we are in mips16 mode
diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
index 7279b091b34c..628819de407e 100644
--- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
+++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
@@ -17,6 +17,7 @@
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetOpcodes.h"
@@ -260,7 +261,7 @@ void PPCInstPrinter::printAbsBranchOperand(const MCInst *MI, unsigned OpNo,
if (!MI->getOperand(OpNo).isImm())
return printOperand(MI, OpNo, O);
- O << (int)MI->getOperand(OpNo).getImm()*4;
+ O << SignExtend32<32>((unsigned)MI->getOperand(OpNo).getImm() << 2);
}
@@ -308,10 +309,16 @@ void PPCInstPrinter::printMemRegReg(const MCInst *MI, unsigned OpNo,
void PPCInstPrinter::printTLSCall(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
- printBranchOperand(MI, OpNo, O);
+ // On PPC64, VariantKind is VK_None, but on PPC32, it's VK_PLT, and it must
+ // come at the _end_ of the expression.
+ const MCOperand &Op = MI->getOperand(OpNo);
+ const MCSymbolRefExpr &refExp = cast<MCSymbolRefExpr>(*Op.getExpr());
+ O << refExp.getSymbol().getName();
O << '(';
printOperand(MI, OpNo+1, O);
O << ')';
+ if (refExp.getKind() != MCSymbolRefExpr::VK_None)
+ O << '@' << MCSymbolRefExpr::getVariantKindName(refExp.getKind());
}
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
index e93e95fc0751..ca813176bd5a 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
@@ -236,7 +236,10 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target,
Type = ELF::R_PPC64_DTPREL16_HIGHESTA;
break;
case MCSymbolRefExpr::VK_PPC_GOT_TLSGD:
- Type = ELF::R_PPC64_GOT_TLSGD16;
+ if (is64Bit())
+ Type = ELF::R_PPC64_GOT_TLSGD16;
+ else
+ Type = ELF::R_PPC_GOT_TLSGD16;
break;
case MCSymbolRefExpr::VK_PPC_GOT_TLSGD_LO:
Type = ELF::R_PPC64_GOT_TLSGD16_LO;
@@ -248,7 +251,10 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target,
Type = ELF::R_PPC64_GOT_TLSGD16_HA;
break;
case MCSymbolRefExpr::VK_PPC_GOT_TLSLD:
- Type = ELF::R_PPC64_GOT_TLSLD16;
+ if (is64Bit())
+ Type = ELF::R_PPC64_GOT_TLSLD16;
+ else
+ Type = ELF::R_PPC_GOT_TLSLD16;
break;
case MCSymbolRefExpr::VK_PPC_GOT_TLSLD_LO:
Type = ELF::R_PPC64_GOT_TLSLD16_LO;
@@ -344,13 +350,22 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target,
switch (Modifier) {
default: llvm_unreachable("Unsupported Modifier");
case MCSymbolRefExpr::VK_PPC_TLSGD:
- Type = ELF::R_PPC64_TLSGD;
+ if (is64Bit())
+ Type = ELF::R_PPC64_TLSGD;
+ else
+ Type = ELF::R_PPC_TLSGD;
break;
case MCSymbolRefExpr::VK_PPC_TLSLD:
- Type = ELF::R_PPC64_TLSLD;
+ if (is64Bit())
+ Type = ELF::R_PPC64_TLSLD;
+ else
+ Type = ELF::R_PPC_TLSLD;
break;
case MCSymbolRefExpr::VK_PPC_TLS:
- Type = ELF::R_PPC64_TLS;
+ if (is64Bit())
+ Type = ELF::R_PPC64_TLS;
+ else
+ Type = ELF::R_PPC_TLS;
break;
}
break;
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
index 4c6780ff75a7..1bb5173e1937 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
@@ -184,6 +184,23 @@ public:
if ((Flags & ELF::EF_PPC64_ABI) == 0)
MCA.setELFHeaderEFlags(Flags | 2);
}
+ void emitAssignment(MCSymbol *Symbol, const MCExpr *Value) override {
+ // When encoding an assignment to set symbol A to symbol B, also copy
+ // the st_other bits encoding the local entry point offset.
+ if (Value->getKind() != MCExpr::SymbolRef)
+ return;
+ const MCSymbol &RhsSym =
+ static_cast<const MCSymbolRefExpr *>(Value)->getSymbol();
+ MCSymbolData &Data = getStreamer().getOrCreateSymbolData(&RhsSym);
+ MCSymbolData &SymbolData = getStreamer().getOrCreateSymbolData(Symbol);
+ // The "other" values are stored in the last 6 bits of the second byte.
+ // The traditional defines for STO values assume the full byte and thus
+ // the shift to pack it.
+ unsigned Other = MCELF::getOther(SymbolData) << 2;
+ Other &= ~ELF::STO_PPC64_LOCAL_MASK;
+ Other |= (MCELF::getOther(Data) << 2) & ELF::STO_PPC64_LOCAL_MASK;
+ MCELF::setOther(SymbolData, Other >> 2);
+ }
};
class PPCTargetMachOStreamer : public PPCTargetStreamer {
diff --git a/lib/Target/PowerPC/PPC.h b/lib/Target/PowerPC/PPC.h
index ba5fa4f79b4e..38840e624a53 100644
--- a/lib/Target/PowerPC/PPC.h
+++ b/lib/Target/PowerPC/PPC.h
@@ -96,7 +96,12 @@ namespace llvm {
MO_TOC_LO = 7 << 4,
// Symbol for VK_PPC_TLS fixup attached to an ADD instruction
- MO_TLS = 8 << 4
+ MO_TLS = 8 << 4,
+
+ // Symbols for VK_PPC_TLSGD and VK_PPC_TLSLD in __tls_get_addr
+ // call sequences.
+ MO_TLSLD = 9 << 4,
+ MO_TLSGD = 10 << 4
};
} // end namespace PPCII
diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 6f67c598c754..8357665d30db 100644
--- a/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -373,7 +373,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
const MachineOperand &MO = MI->getOperand(1);
// Map symbol -> label of TOC entry
- assert(MO.isGlobal() || MO.isCPI() || MO.isJTI());
+ assert(MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress());
MCSymbol *MOSymbol = nullptr;
if (MO.isGlobal())
MOSymbol = getSymbol(MO.getGlobal());
@@ -381,6 +381,8 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
MOSymbol = GetCPISymbol(MO.getIndex());
else if (MO.isJTI())
MOSymbol = GetJTISymbol(MO.getIndex());
+ else if (MO.isBlockAddress())
+ MOSymbol = GetBlockAddressSymbol(MO.getBlockAddress());
MCSymbol *TOCEntry = lookUpOrCreateTOCEntry(MOSymbol);
@@ -397,6 +399,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
}
case PPC::LDtocJTI:
case PPC::LDtocCPT:
+ case PPC::LDtocBA:
case PPC::LDtoc: {
// Transform %X3 = LDtoc <ga:@min1>, %X2
LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin());
@@ -407,7 +410,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
const MachineOperand &MO = MI->getOperand(1);
// Map symbol -> label of TOC entry
- assert(MO.isGlobal() || MO.isCPI() || MO.isJTI());
+ assert(MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress());
MCSymbol *MOSymbol = nullptr;
if (MO.isGlobal())
MOSymbol = getSymbol(MO.getGlobal());
@@ -415,6 +418,8 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
MOSymbol = GetCPISymbol(MO.getIndex());
else if (MO.isJTI())
MOSymbol = GetJTISymbol(MO.getIndex());
+ else if (MO.isBlockAddress())
+ MOSymbol = GetBlockAddressSymbol(MO.getBlockAddress());
MCSymbol *TOCEntry = lookUpOrCreateTOCEntry(MOSymbol);
@@ -436,7 +441,8 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
// reference the symbol directly.
TmpInst.setOpcode(PPC::ADDIS8);
const MachineOperand &MO = MI->getOperand(2);
- assert((MO.isGlobal() || MO.isCPI() || MO.isJTI()) &&
+ assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() ||
+ MO.isBlockAddress()) &&
"Invalid operand for ADDIStocHA!");
MCSymbol *MOSymbol = nullptr;
bool IsExternal = false;
@@ -456,9 +462,12 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
MOSymbol = GetCPISymbol(MO.getIndex());
else if (MO.isJTI())
MOSymbol = GetJTISymbol(MO.getIndex());
+ else if (MO.isBlockAddress())
+ MOSymbol = GetBlockAddressSymbol(MO.getBlockAddress());
if (IsExternal || IsNonLocalFunction || IsCommon || IsAvailExt ||
- MO.isJTI() || TM.getCodeModel() == CodeModel::Large)
+ MO.isJTI() || MO.isBlockAddress() ||
+ TM.getCodeModel() == CodeModel::Large)
MOSymbol = lookUpOrCreateTOCEntry(MOSymbol);
const MCExpr *Exp =
@@ -477,12 +486,17 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
// associated TOC entry. Otherwise reference the symbol directly.
TmpInst.setOpcode(PPC::LD);
const MachineOperand &MO = MI->getOperand(1);
- assert((MO.isGlobal() || MO.isJTI() || MO.isCPI()) &&
+ assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() ||
+ MO.isBlockAddress()) &&
"Invalid operand for LDtocL!");
MCSymbol *MOSymbol = nullptr;
if (MO.isJTI())
MOSymbol = lookUpOrCreateTOCEntry(GetJTISymbol(MO.getIndex()));
+ else if (MO.isBlockAddress()) {
+ MOSymbol = GetBlockAddressSymbol(MO.getBlockAddress());
+ MOSymbol = lookUpOrCreateTOCEntry(MOSymbol);
+ }
else if (MO.isCPI()) {
MOSymbol = GetCPISymbol(MO.getIndex());
if (TM.getCodeModel() == CodeModel::Large)
@@ -573,6 +587,34 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
return;
}
+ case PPC::PPC32PICGOT: {
+ MCSymbol *GOTSymbol = OutContext.GetOrCreateSymbol(StringRef("_GLOBAL_OFFSET_TABLE_"));
+ MCSymbol *GOTRef = OutContext.CreateTempSymbol();
+ MCSymbol *NextInstr = OutContext.CreateTempSymbol();
+
+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::BL)
+ // FIXME: We would like an efficient form for this, so we don't have to do
+ // a lot of extra uniquing.
+ .addExpr(MCSymbolRefExpr::Create(NextInstr, OutContext)));
+ const MCExpr *OffsExpr =
+ MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(GOTSymbol, OutContext),
+ MCSymbolRefExpr::Create(GOTRef, OutContext),
+ OutContext);
+ OutStreamer.EmitLabel(GOTRef);
+ OutStreamer.EmitValue(OffsExpr, 4);
+ OutStreamer.EmitLabel(NextInstr);
+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::MFLR)
+ .addReg(MI->getOperand(0).getReg()));
+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::LWZ)
+ .addReg(MI->getOperand(1).getReg())
+ .addImm(0)
+ .addReg(MI->getOperand(0).getReg()));
+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADD4)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg())
+ .addReg(MI->getOperand(0).getReg()));
+ return;
+ }
case PPC::PPC32GOT: {
MCSymbol *GOTSymbol = OutContext.GetOrCreateSymbol(StringRef("_GLOBAL_OFFSET_TABLE_"));
const MCExpr *SymGotTlsL =
@@ -606,40 +648,25 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
.addExpr(SymGotTlsGD));
return;
}
- case PPC::ADDItlsgdL: {
+ case PPC::ADDItlsgdL:
// Transform: %Xd = ADDItlsgdL %Xs, <ga:@sym>
// Into: %Xd = ADDI8 %Xs, sym@got@tlsgd@l
- assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
+ case PPC::ADDItlsgdL32: {
+ // Transform: %Rd = ADDItlsgdL32 %Rs, <ga:@sym>
+ // Into: %Rd = ADDI %Rs, sym@got@tlsgd
const MachineOperand &MO = MI->getOperand(2);
const GlobalValue *GValue = MO.getGlobal();
MCSymbol *MOSymbol = getSymbol(GValue);
const MCExpr *SymGotTlsGD =
- MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSGD_LO,
+ MCSymbolRefExpr::Create(MOSymbol, Subtarget.isPPC64() ?
+ MCSymbolRefExpr::VK_PPC_GOT_TLSGD_LO :
+ MCSymbolRefExpr::VK_PPC_GOT_TLSGD,
OutContext);
- EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDI8)
- .addReg(MI->getOperand(0).getReg())
- .addReg(MI->getOperand(1).getReg())
- .addExpr(SymGotTlsGD));
- return;
- }
- case PPC::GETtlsADDR: {
- // Transform: %X3 = GETtlsADDR %X3, <ga:@sym>
- // Into: BL8_NOP_TLS __tls_get_addr(sym@tlsgd)
- assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
-
- StringRef Name = "__tls_get_addr";
- MCSymbol *TlsGetAddr = OutContext.GetOrCreateSymbol(Name);
- const MCSymbolRefExpr *TlsRef =
- MCSymbolRefExpr::Create(TlsGetAddr, MCSymbolRefExpr::VK_None, OutContext);
- const MachineOperand &MO = MI->getOperand(2);
- const GlobalValue *GValue = MO.getGlobal();
- MCSymbol *MOSymbol = getSymbol(GValue);
- const MCExpr *SymVar =
- MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TLSGD,
- OutContext);
- EmitToStreamer(OutStreamer, MCInstBuilder(PPC::BL8_NOP_TLS)
- .addExpr(TlsRef)
- .addExpr(SymVar));
+ EmitToStreamer(OutStreamer,
+ MCInstBuilder(Subtarget.isPPC64() ? PPC::ADDI8 : PPC::ADDI)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg())
+ .addExpr(SymGotTlsGD));
return;
}
case PPC::ADDIStlsldHA: {
@@ -658,72 +685,63 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
.addExpr(SymGotTlsLD));
return;
}
- case PPC::ADDItlsldL: {
+ case PPC::ADDItlsldL:
// Transform: %Xd = ADDItlsldL %Xs, <ga:@sym>
// Into: %Xd = ADDI8 %Xs, sym@got@tlsld@l
- assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
+ case PPC::ADDItlsldL32: {
+ // Transform: %Rd = ADDItlsldL32 %Rs, <ga:@sym>
+ // Into: %Rd = ADDI %Rs, sym@got@tlsld
const MachineOperand &MO = MI->getOperand(2);
const GlobalValue *GValue = MO.getGlobal();
MCSymbol *MOSymbol = getSymbol(GValue);
const MCExpr *SymGotTlsLD =
- MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSLD_LO,
+ MCSymbolRefExpr::Create(MOSymbol, Subtarget.isPPC64() ?
+ MCSymbolRefExpr::VK_PPC_GOT_TLSLD_LO :
+ MCSymbolRefExpr::VK_PPC_GOT_TLSLD,
OutContext);
- EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDI8)
- .addReg(MI->getOperand(0).getReg())
- .addReg(MI->getOperand(1).getReg())
- .addExpr(SymGotTlsLD));
+ EmitToStreamer(OutStreamer,
+ MCInstBuilder(Subtarget.isPPC64() ? PPC::ADDI8 : PPC::ADDI)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg())
+ .addExpr(SymGotTlsLD));
return;
}
- case PPC::GETtlsldADDR: {
- // Transform: %X3 = GETtlsldADDR %X3, <ga:@sym>
- // Into: BL8_NOP_TLS __tls_get_addr(sym@tlsld)
- assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
-
- StringRef Name = "__tls_get_addr";
- MCSymbol *TlsGetAddr = OutContext.GetOrCreateSymbol(Name);
- const MCSymbolRefExpr *TlsRef =
- MCSymbolRefExpr::Create(TlsGetAddr, MCSymbolRefExpr::VK_None, OutContext);
- const MachineOperand &MO = MI->getOperand(2);
- const GlobalValue *GValue = MO.getGlobal();
- MCSymbol *MOSymbol = getSymbol(GValue);
- const MCExpr *SymVar =
- MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TLSLD,
- OutContext);
- EmitToStreamer(OutStreamer, MCInstBuilder(PPC::BL8_NOP_TLS)
- .addExpr(TlsRef)
- .addExpr(SymVar));
- return;
- }
- case PPC::ADDISdtprelHA: {
+ case PPC::ADDISdtprelHA:
// Transform: %Xd = ADDISdtprelHA %X3, <ga:@sym>
// Into: %Xd = ADDIS8 %X3, sym@dtprel@ha
- assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
+ case PPC::ADDISdtprelHA32: {
+ // Transform: %Rd = ADDISdtprelHA32 %R3, <ga:@sym>
+ // Into: %Rd = ADDIS %R3, sym@dtprel@ha
const MachineOperand &MO = MI->getOperand(2);
const GlobalValue *GValue = MO.getGlobal();
MCSymbol *MOSymbol = getSymbol(GValue);
const MCExpr *SymDtprel =
MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_DTPREL_HA,
OutContext);
- EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDIS8)
- .addReg(MI->getOperand(0).getReg())
- .addReg(PPC::X3)
- .addExpr(SymDtprel));
+ EmitToStreamer(OutStreamer,
+ MCInstBuilder(Subtarget.isPPC64() ? PPC::ADDIS8 : PPC::ADDIS)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(Subtarget.isPPC64() ? PPC::X3 : PPC::R3)
+ .addExpr(SymDtprel));
return;
}
- case PPC::ADDIdtprelL: {
+ case PPC::ADDIdtprelL:
// Transform: %Xd = ADDIdtprelL %Xs, <ga:@sym>
// Into: %Xd = ADDI8 %Xs, sym@dtprel@l
- assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
+ case PPC::ADDIdtprelL32: {
+ // Transform: %Rd = ADDIdtprelL32 %Rs, <ga:@sym>
+ // Into: %Rd = ADDI %Rs, sym@dtprel@l
const MachineOperand &MO = MI->getOperand(2);
const GlobalValue *GValue = MO.getGlobal();
MCSymbol *MOSymbol = getSymbol(GValue);
const MCExpr *SymDtprel =
MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_DTPREL_LO,
OutContext);
- EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDI8)
- .addReg(MI->getOperand(0).getReg())
- .addReg(MI->getOperand(1).getReg())
- .addExpr(SymDtprel));
+ EmitToStreamer(OutStreamer,
+ MCInstBuilder(Subtarget.isPPC64() ? PPC::ADDI8 : PPC::ADDI)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg())
+ .addExpr(SymDtprel));
return;
}
case PPC::MFOCRF:
@@ -903,7 +921,7 @@ bool PPCLinuxAsmPrinter::doFinalization(Module &M) {
for (MapVector<MCSymbol*, MCSymbol*>::iterator I = TOC.begin(),
E = TOC.end(); I != E; ++I) {
OutStreamer.EmitLabel(I->second);
- MCSymbol *S = OutContext.GetOrCreateSymbol(I->first->getName());
+ MCSymbol *S = I->first;
if (isPPC64)
TS.emitTCEntry(*S);
else
diff --git a/lib/Target/PowerPC/PPCFastISel.cpp b/lib/Target/PowerPC/PPCFastISel.cpp
index 2e524d604789..9c55a2900389 100644
--- a/lib/Target/PowerPC/PPCFastISel.cpp
+++ b/lib/Target/PowerPC/PPCFastISel.cpp
@@ -153,7 +153,7 @@ class PPCFastISel final : public FastISel {
unsigned DestReg, bool IsZExt);
unsigned PPCMaterializeFP(const ConstantFP *CFP, MVT VT);
unsigned PPCMaterializeGV(const GlobalValue *GV, MVT VT);
- unsigned PPCMaterializeInt(const Constant *C, MVT VT);
+ unsigned PPCMaterializeInt(const Constant *C, MVT VT, bool UseSExt = true);
unsigned PPCMaterialize32BitInt(int64_t Imm,
const TargetRegisterClass *RC);
unsigned PPCMaterialize64BitInt(int64_t Imm,
@@ -865,7 +865,7 @@ bool PPCFastISel::SelectFPTrunc(const Instruction *I) {
}
// Move an i32 or i64 value in a GPR to an f64 value in an FPR.
-// FIXME: When direct register moves are implemented (see PowerISA 2.08),
+// FIXME: When direct register moves are implemented (see PowerISA 2.07),
// those should be used instead of moving via a stack slot when the
// subtarget permits.
// FIXME: The code here is sloppy for the 4-byte case. Can use a 4-byte
@@ -898,10 +898,10 @@ unsigned PPCFastISel::PPCMoveToFPReg(MVT SrcVT, unsigned SrcReg,
if (SrcVT == MVT::i32) {
if (!IsSigned) {
LoadOpc = PPC::LFIWZX;
- Addr.Offset = 4;
+ Addr.Offset = (PPCSubTarget->isLittleEndian()) ? 0 : 4;
} else if (PPCSubTarget->hasLFIWAX()) {
LoadOpc = PPC::LFIWAX;
- Addr.Offset = 4;
+ Addr.Offset = (PPCSubTarget->isLittleEndian()) ? 0 : 4;
}
}
@@ -985,7 +985,7 @@ bool PPCFastISel::SelectIToFP(const Instruction *I, bool IsSigned) {
// Move the floating-point value in SrcReg into an integer destination
// register, and return the register (or zero if we can't handle it).
-// FIXME: When direct register moves are implemented (see PowerISA 2.08),
+// FIXME: When direct register moves are implemented (see PowerISA 2.07),
// those should be used instead of moving via a stack slot when the
// subtarget permits.
unsigned PPCFastISel::PPCMoveToIntReg(const Instruction *I, MVT VT,
@@ -1548,13 +1548,23 @@ bool PPCFastISel::SelectRet(const Instruction *I) {
// Special case for returning a constant integer of any size.
// Materialize the constant as an i64 and copy it to the return
- // register. This avoids an unnecessary extend or truncate.
+ // register. We still need to worry about properly extending the sign. E.g:
+ // If the constant has only one bit, it means it is a boolean. Therefore
+ // we can't use PPCMaterializeInt because it extends the sign which will
+ // cause negations of the returned value to be incorrect as they are
+ // implemented as the flip of the least significant bit.
if (isa<ConstantInt>(*RV)) {
const Constant *C = cast<Constant>(RV);
- unsigned SrcReg = PPCMaterializeInt(C, MVT::i64);
- unsigned RetReg = ValLocs[0].getLocReg();
+
+ CCValAssign &VA = ValLocs[0];
+
+ unsigned RetReg = VA.getLocReg();
+ unsigned SrcReg = PPCMaterializeInt(C, MVT::i64,
+ VA.getLocInfo() == CCValAssign::SExt);
+
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(TargetOpcode::COPY), RetReg).addReg(SrcReg);
+ TII.get(TargetOpcode::COPY), RetReg).addReg(SrcReg);
+
RetRegs.push_back(RetReg);
} else {
@@ -2014,7 +2024,8 @@ unsigned PPCFastISel::PPCMaterialize64BitInt(int64_t Imm,
// Materialize an integer constant into a register, and return
// the register number (or zero if we failed to handle it).
-unsigned PPCFastISel::PPCMaterializeInt(const Constant *C, MVT VT) {
+unsigned PPCFastISel::PPCMaterializeInt(const Constant *C, MVT VT,
+ bool UseSExt) {
// If we're using CR bit registers for i1 values, handle that as a special
// case first.
if (VT == MVT::i1 && PPCSubTarget->useCRBits()) {
@@ -2038,7 +2049,7 @@ unsigned PPCFastISel::PPCMaterializeInt(const Constant *C, MVT VT) {
unsigned Opc = (VT == MVT::i64) ? PPC::LI8 : PPC::LI;
unsigned ImmReg = createResultReg(RC);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ImmReg)
- .addImm(CI->getSExtValue());
+ .addImm( (UseSExt) ? CI->getSExtValue() : CI->getZExtValue() );
return ImmReg;
}
diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp
index b2577a9c7cf7..2a02dfca6ae6 100644
--- a/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -505,7 +505,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
MachineModuleInfo &MMI = MF.getMMI();
const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
DebugLoc dl;
- bool needsFrameMoves = MMI.hasDebugInfo() ||
+ bool needsCFI = MMI.hasDebugInfo() ||
MF.getFunction()->needsUnwindTableEntry();
bool isPIC = MF.getTarget().getRelocationModel() == Reloc::PIC_;
@@ -726,17 +726,28 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
.addReg(ScratchReg);
}
- // Add the "machine moves" for the instructions we generated above, but in
- // reverse order.
- if (needsFrameMoves) {
- // Show update of SP.
- assert(NegFrameSize);
- unsigned CFIIndex = MMI.addFrameInst(
- MCCFIInstruction::createDefCfaOffset(nullptr, NegFrameSize));
+ // Add Call Frame Information for the instructions we generated above.
+ if (needsCFI) {
+ unsigned CFIIndex;
+
+ if (HasBP) {
+ // Define CFA in terms of BP. Do this in preference to using FP/SP,
+ // because if the stack needed aligning then CFA won't be at a fixed
+ // offset from FP/SP.
+ unsigned Reg = MRI->getDwarfRegNum(BPReg, true);
+ CFIIndex = MMI.addFrameInst(
+ MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
+ } else {
+ // Adjust the definition of CFA to account for the change in SP.
+ assert(NegFrameSize);
+ CFIIndex = MMI.addFrameInst(
+ MCCFIInstruction::createDefCfaOffset(nullptr, NegFrameSize));
+ }
BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex);
if (HasFP) {
+ // Describe where FP was saved, at a fixed offset from CFA.
unsigned Reg = MRI->getDwarfRegNum(FPReg, true);
CFIIndex = MMI.addFrameInst(
MCCFIInstruction::createOffset(nullptr, Reg, FPOffset));
@@ -745,6 +756,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
}
if (HasBP) {
+ // Describe where BP was saved, at a fixed offset from CFA.
unsigned Reg = MRI->getDwarfRegNum(BPReg, true);
CFIIndex = MMI.addFrameInst(
MCCFIInstruction::createOffset(nullptr, Reg, BPOffset));
@@ -753,6 +765,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
}
if (MustSaveLR) {
+ // Describe where LR was saved, at a fixed offset from CFA.
unsigned Reg = MRI->getDwarfRegNum(LRReg, true);
CFIIndex = MMI.addFrameInst(
MCCFIInstruction::createOffset(nullptr, Reg, LROffset));
@@ -767,8 +780,9 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
.addReg(SPReg)
.addReg(SPReg);
- if (needsFrameMoves) {
- // Mark effective beginning of when frame pointer is ready.
+ if (!HasBP && needsCFI) {
+ // Change the definition of CFA from SP+offset to FP+offset, because SP
+ // will change at every alloca.
unsigned Reg = MRI->getDwarfRegNum(FPReg, true);
unsigned CFIIndex = MMI.addFrameInst(
MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
@@ -778,8 +792,9 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
}
}
- if (needsFrameMoves) {
- // Add callee saved registers to move list.
+ if (needsCFI) {
+ // Describe where callee saved registers were saved, at fixed offsets from
+ // CFA.
const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
unsigned Reg = CSI[I].getReg();
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 536c2824fb7a..57960598c83f 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -172,10 +172,20 @@ namespace {
/// a register. The case of adding a (possibly relocatable) constant to a
/// register can be improved, but it is wrong to substitute Reg+Reg for
/// Reg in an asm, because the load or store opcode would have to change.
- bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+ bool SelectInlineAsmMemoryOperand(const SDValue &Op,
char ConstraintCode,
std::vector<SDValue> &OutOps) override {
- OutOps.push_back(Op);
+ // We need to make sure that this one operand does not end up in r0
+ // (because we might end up lowering this as 0(%op)).
+ const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+ const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF, /*Kind=*/1);
+ SDValue RC = CurDAG->getTargetConstant(TRC->getID(), MVT::i32);
+ SDValue NewOp =
+ SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
+ SDLoc(Op), Op.getValueType(),
+ Op, RC), 0);
+
+ OutOps.push_back(NewOp);
return false;
}
@@ -1439,7 +1449,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
// For medium and large code model, we generate two instructions as
// described below. Otherwise we allow SelectCodeCommon to handle this,
- // selecting one of LDtoc, LDtocJTI, and LDtocCPT.
+ // selecting one of LDtoc, LDtocJTI, LDtocCPT, and LDtocBA.
CodeModel::Model CModel = TM.getCodeModel();
if (CModel != CodeModel::Medium && CModel != CodeModel::Large)
break;
@@ -1456,7 +1466,8 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
SDNode *Tmp = CurDAG->getMachineNode(PPC::ADDIStocHA, dl, MVT::i64,
TOCbase, GA);
- if (isa<JumpTableSDNode>(GA) || CModel == CodeModel::Large)
+ if (isa<JumpTableSDNode>(GA) || isa<BlockAddressSDNode>(GA) ||
+ CModel == CodeModel::Large)
return CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA,
SDValue(Tmp, 0));
@@ -1473,6 +1484,12 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
return CurDAG->getMachineNode(PPC::ADDItocL, dl, MVT::i64,
SDValue(Tmp, 0), GA);
}
+ case PPCISD::PPC32_PICGOT: {
+ // Generate a PIC-safe GOT reference.
+ assert(!PPCSubTarget->isPPC64() && PPCSubTarget->isSVR4ABI() &&
+ "PPCISD::PPC32_PICGOT is only supported for 32-bit SVR4");
+ return CurDAG->SelectNodeTo(N, PPC::PPC32PICGOT, PPCLowering->getPointerTy(), MVT::i32);
+ }
case PPCISD::VADD_SPLAT: {
// This expands into one of three sequences, depending on whether
// the first operand is odd or even, positive or negative.
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 708d36f6f978..0d37f50cd17b 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -781,6 +781,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::SHL: return "PPCISD::SHL";
case PPCISD::CALL: return "PPCISD::CALL";
case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP";
+ case PPCISD::CALL_TLS: return "PPCISD::CALL_TLS";
+ case PPCISD::CALL_NOP_TLS: return "PPCISD::CALL_NOP_TLS";
case PPCISD::MTCTR: return "PPCISD::MTCTR";
case PPCISD::BCTRL: return "PPCISD::BCTRL";
case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG";
@@ -810,10 +812,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS";
case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA";
case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L";
- case PPCISD::GET_TLS_ADDR: return "PPCISD::GET_TLS_ADDR";
case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA";
case PPCISD::ADDI_TLSLD_L: return "PPCISD::ADDI_TLSLD_L";
- case PPCISD::GET_TLSLD_ADDR: return "PPCISD::GET_TLSLD_ADDR";
case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L";
case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT";
@@ -1631,8 +1631,16 @@ SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
SelectionDAG &DAG) const {
EVT PtrVT = Op.getValueType();
+ BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op);
+ const BlockAddress *BA = BASDN->getBlockAddress();
- const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
+ // 64-bit SVR4 ABI code is always position-independent.
+ // The actual BlockAddress is stored in the TOC.
+ if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
+ SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
+ return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(BASDN), MVT::i64, GA,
+ DAG.getRegister(PPC::X2, MVT::i64));
+ }
unsigned MOHiFlag, MOLoFlag;
bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag);
@@ -1641,6 +1649,27 @@ SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
return LowerLabelRef(TgtBAHi, TgtBALo, isPIC, DAG);
}
+// Generate a call to __tls_get_addr for the given GOT entry Op.
+std::pair<SDValue,SDValue>
+PPCTargetLowering::lowerTLSCall(SDValue Op, SDLoc dl,
+ SelectionDAG &DAG) const {
+
+ Type *IntPtrTy = getDataLayout()->getIntPtrType(*DAG.getContext());
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ Entry.Node = Op;
+ Entry.Ty = IntPtrTy;
+ Args.push_back(Entry);
+
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(dl).setChain(DAG.getEntryNode())
+ .setCallee(CallingConv::C, IntPtrTy,
+ DAG.getTargetExternalSymbol("__tls_get_addr", getPointerTy()),
+ std::move(Args), 0);
+
+ return LowerCallTo(CLI);
+}
+
SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
SelectionDAG &DAG) const {
@@ -1684,50 +1713,40 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
}
if (Model == TLSModel::GeneralDynamic) {
- SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
- SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
- SDValue GOTEntryHi = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
- GOTReg, TGA);
+ SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
+ PPCII::MO_TLSGD);
+ SDValue GOTPtr;
+ if (is64bit) {
+ SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
+ GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
+ GOTReg, TGA);
+ } else {
+ GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
+ }
SDValue GOTEntry = DAG.getNode(PPCISD::ADDI_TLSGD_L, dl, PtrVT,
- GOTEntryHi, TGA);
-
- // We need a chain node, and don't have one handy. The underlying
- // call has no side effects, so using the function entry node
- // suffices.
- SDValue Chain = DAG.getEntryNode();
- Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, GOTEntry);
- SDValue ParmReg = DAG.getRegister(PPC::X3, MVT::i64);
- SDValue TLSAddr = DAG.getNode(PPCISD::GET_TLS_ADDR, dl,
- PtrVT, ParmReg, TGA);
- // The return value from GET_TLS_ADDR really is in X3 already, but
- // some hacks are needed here to tie everything together. The extra
- // copies dissolve during subsequent transforms.
- Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, TLSAddr);
- return DAG.getCopyFromReg(Chain, dl, PPC::X3, PtrVT);
+ GOTPtr, TGA);
+ std::pair<SDValue, SDValue> CallResult = lowerTLSCall(GOTEntry, dl, DAG);
+ return CallResult.first;
}
if (Model == TLSModel::LocalDynamic) {
- SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
- SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
- SDValue GOTEntryHi = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
- GOTReg, TGA);
+ SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
+ PPCII::MO_TLSLD);
+ SDValue GOTPtr;
+ if (is64bit) {
+ SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
+ GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
+ GOTReg, TGA);
+ } else {
+ GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
+ }
SDValue GOTEntry = DAG.getNode(PPCISD::ADDI_TLSLD_L, dl, PtrVT,
- GOTEntryHi, TGA);
-
- // We need a chain node, and don't have one handy. The underlying
- // call has no side effects, so using the function entry node
- // suffices.
- SDValue Chain = DAG.getEntryNode();
- Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, GOTEntry);
- SDValue ParmReg = DAG.getRegister(PPC::X3, MVT::i64);
- SDValue TLSAddr = DAG.getNode(PPCISD::GET_TLSLD_ADDR, dl,
- PtrVT, ParmReg, TGA);
- // The return value from GET_TLSLD_ADDR really is in X3 already, but
- // some hacks are needed here to tie everything together. The extra
- // copies dissolve during subsequent transforms.
- Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, TLSAddr);
+ GOTPtr, TGA);
+ std::pair<SDValue, SDValue> CallResult = lowerTLSCall(GOTEntry, dl, DAG);
+ SDValue TLSAddr = CallResult.first;
+ SDValue Chain = CallResult.second;
SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl, PtrVT,
- Chain, ParmReg, TGA);
+ Chain, TLSAddr, TGA);
return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
}
@@ -2676,7 +2695,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
int FI;
if (HasParameterArea ||
ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
- FI = MFI->CreateFixedObject(ArgSize, ArgOffset, true);
+ FI = MFI->CreateFixedObject(ArgSize, ArgOffset, false);
else
FI = MFI->CreateStackObject(ArgSize, Align, false);
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
@@ -3042,7 +3061,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
CurArgOffset = CurArgOffset + (4 - ObjSize);
}
// The value of the object is its address.
- int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, true);
+ int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, false);
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
InVals.push_back(FIN);
if (ObjSize==1 || ObjSize==2) {
@@ -3690,6 +3709,23 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
if (Callee.getNode()) {
Ops.push_back(Chain);
Ops.push_back(Callee);
+
+ // If this is a call to __tls_get_addr, find the symbol whose address
+ // is to be taken and add it to the list. This will be used to
+ // generate __tls_get_addr(<sym>@tlsgd) or __tls_get_addr(<sym>@tlsld).
+ // We find the symbol by walking the chain to the CopyFromReg, walking
+ // back from the CopyFromReg to the ADDI_TLSGD_L or ADDI_TLSLD_L, and
+ // pulling the symbol from that node.
+ if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
+ if (!strcmp(S->getSymbol(), "__tls_get_addr")) {
+ assert(!needIndirectCall && "Indirect call to __tls_get_addr???");
+ SDNode *AddI = Chain.getNode()->getOperand(2).getNode();
+ SDValue TGTAddr = AddI->getOperand(1);
+ assert(TGTAddr.getNode()->getOpcode() == ISD::TargetGlobalTLSAddress &&
+ "Didn't find target global TLS address where we expected one");
+ Ops.push_back(TGTAddr);
+ CallOpc = PPCISD::CALL_TLS;
+ }
}
// If this is a tail call add stack pointer delta.
if (isTailCall)
@@ -3841,7 +3877,9 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl,
DAG.getTarget().getRelocationModel() == Reloc::PIC_)) {
// Otherwise insert NOP for non-local calls.
CallOpc = PPCISD::CALL_NOP;
- }
+ } else if (CallOpc == PPCISD::CALL_TLS)
+ // For 64-bit SVR4, TLS calls are always non-local.
+ CallOpc = PPCISD::CALL_NOP_TLS;
}
Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
@@ -8936,6 +8974,12 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
&PPC::G8RCRegClass);
}
+ // GCC accepts 'cc' as an alias for 'cr0', and we need to do the same.
+ if (!R.second && StringRef("{cc}").equals_lower(Constraint)) {
+ R.first = PPC::CR0;
+ R.second = &PPC::CRRCRegClass;
+ }
+
return R;
}
@@ -8964,37 +9008,42 @@ void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
case 'P': {
ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op);
if (!CST) return; // Must be an immediate to match.
- unsigned Value = CST->getZExtValue();
+ int64_t Value = CST->getSExtValue();
+ EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative
+ // numbers are printed as such.
switch (Letter) {
default: llvm_unreachable("Unknown constraint letter!");
case 'I': // "I" is a signed 16-bit constant.
- if ((short)Value == (int)Value)
- Result = DAG.getTargetConstant(Value, Op.getValueType());
+ if (isInt<16>(Value))
+ Result = DAG.getTargetConstant(Value, TCVT);
break;
case 'J': // "J" is a constant with only the high-order 16 bits nonzero.
+ if (isShiftedUInt<16, 16>(Value))
+ Result = DAG.getTargetConstant(Value, TCVT);
+ break;
case 'L': // "L" is a signed 16-bit constant shifted left 16 bits.
- if ((short)Value == 0)
- Result = DAG.getTargetConstant(Value, Op.getValueType());
+ if (isShiftedInt<16, 16>(Value))
+ Result = DAG.getTargetConstant(Value, TCVT);
break;
case 'K': // "K" is a constant with only the low-order 16 bits nonzero.
- if ((Value >> 16) == 0)
- Result = DAG.getTargetConstant(Value, Op.getValueType());
+ if (isUInt<16>(Value))
+ Result = DAG.getTargetConstant(Value, TCVT);
break;
case 'M': // "M" is a constant that is greater than 31.
if (Value > 31)
- Result = DAG.getTargetConstant(Value, Op.getValueType());
+ Result = DAG.getTargetConstant(Value, TCVT);
break;
case 'N': // "N" is a positive constant that is an exact power of two.
- if ((int)Value > 0 && isPowerOf2_32(Value))
- Result = DAG.getTargetConstant(Value, Op.getValueType());
+ if (Value > 0 && isPowerOf2_64(Value))
+ Result = DAG.getTargetConstant(Value, TCVT);
break;
case 'O': // "O" is the constant zero.
if (Value == 0)
- Result = DAG.getTargetConstant(Value, Op.getValueType());
+ Result = DAG.getTargetConstant(Value, TCVT);
break;
case 'P': // "P" is a constant whose negation is a signed 16-bit constant.
- if ((short)-Value == (int)-Value)
- Result = DAG.getTargetConstant(Value, Op.getValueType());
+ if (isInt<16>(-Value))
+ Result = DAG.getTargetConstant(Value, TCVT);
break;
}
break;
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index c9394dd12e7b..7179adc9704f 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -99,6 +99,10 @@ namespace llvm {
/// SVR4 calls.
CALL, CALL_NOP,
+ /// CALL_TLS and CALL_NOP_TLS - Versions of CALL and CALL_NOP used
+ /// to access TLS variables.
+ CALL_TLS, CALL_NOP_TLS,
+
/// CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a
/// MTCTR instruction.
MTCTR,
@@ -181,6 +185,10 @@ namespace llvm {
/// on PPC32.
PPC32_GOT,
+ /// GPRC = address of _GLOBAL_OFFSET_TABLE_. Used by general dynamic and
+ /// local dynamic TLS on PPC32.
+ PPC32_PICGOT,
+
/// G8RC = ADDIS_GOT_TPREL_HA %X2, Symbol - Used by the initial-exec
/// TLS model, produces an ADDIS8 instruction that adds the GOT
/// base to sym\@got\@tprel\@ha.
@@ -210,10 +218,6 @@ namespace llvm {
/// sym\@got\@tlsgd\@l.
ADDI_TLSGD_L,
- /// G8RC = GET_TLS_ADDR %X3, Symbol - For the general-dynamic TLS
- /// model, produces a call to __tls_get_addr(sym\@tlsgd).
- GET_TLS_ADDR,
-
/// G8RC = ADDIS_TLSLD_HA %X2, Symbol - For the local-dynamic TLS
/// model, produces an ADDIS8 instruction that adds the GOT base
/// register to sym\@got\@tlsld\@ha.
@@ -224,10 +228,6 @@ namespace llvm {
/// sym\@got\@tlsld\@l.
ADDI_TLSLD_L,
- /// G8RC = GET_TLSLD_ADDR %X3, Symbol - For the local-dynamic TLS
- /// model, produces a call to __tls_get_addr(sym\@tlsld).
- GET_TLSLD_ADDR,
-
/// G8RC = ADDIS_DTPREL_HA %X3, Symbol, Chain - For the
/// local-dynamic TLS model, produces an ADDIS8 instruction
/// that adds X3 to sym\@dtprel\@ha. The Chain operand is needed
@@ -548,6 +548,8 @@ namespace llvm {
SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
+ std::pair<SDValue,SDValue> lowerTLSCall(SDValue Op, SDLoc dl,
+ SelectionDAG &DAG) const;
SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td
index 9ed384f56244..b85d9c0ab62c 100644
--- a/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -188,6 +188,9 @@ def : Pat<(PPCcall (i64 texternalsym:$dst)),
def : Pat<(PPCcall_nop (i64 texternalsym:$dst)),
(BL8_NOP texternalsym:$dst)>;
+def : Pat<(PPCcall_nop_tls texternalsym:$func, tglobaltlsaddr:$sym),
+ (BL8_NOP_TLS texternalsym:$func, tglobaltlsaddr:$sym)>;
+
// Atomic operations
let usesCustomInserter = 1 in {
let Defs = [CR0] in {
@@ -786,7 +789,7 @@ let canFoldAsLoad = 1, PPC970_Unit = 2 in {
def LD : DSForm_1<58, 0, (outs g8rc:$rD), (ins memrix:$src),
"ld $rD, $src", IIC_LdStLD,
[(set i64:$rD, (aligned4load ixaddr:$src))]>, isPPC64;
-// The following three definitions are selected for small code model only.
+// The following four definitions are selected for small code model only.
// Otherwise, we need to create two instructions to form a 32-bit offset,
// so we have a custom matcher for TOC_ENTRY in PPCDAGToDAGIsel::Select().
def LDtoc: Pseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc:$reg),
@@ -801,6 +804,10 @@ def LDtocCPT: Pseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc:$reg),
"#LDtocCPT",
[(set i64:$rD,
(PPCtoc_entry tconstpool:$disp, i64:$reg))]>, isPPC64;
+def LDtocBA: Pseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc:$reg),
+ "#LDtocCPT",
+ [(set i64:$rD,
+ (PPCtoc_entry tblockaddress:$disp, i64:$reg))]>, isPPC64;
let hasSideEffects = 1, isCodeGenOnly = 1, RST = 2, Defs = [X2] in
def LDinto_toc: DSForm_1<58, 0, (outs), (ins memrix:$src),
@@ -872,11 +879,6 @@ def ADDItlsgdL : Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
[(set i64:$rD,
(PPCaddiTlsgdL i64:$reg, tglobaltlsaddr:$disp))]>,
isPPC64;
-def GETtlsADDR : Pseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym),
- "#GETtlsADDR",
- [(set i64:$rD,
- (PPCgetTlsAddr i64:$reg, tglobaltlsaddr:$sym))]>,
- isPPC64;
def ADDIStlsldHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
"#ADDIStlsldHA",
[(set i64:$rD,
@@ -887,11 +889,6 @@ def ADDItlsldL : Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
[(set i64:$rD,
(PPCaddiTlsldL i64:$reg, tglobaltlsaddr:$disp))]>,
isPPC64;
-def GETtlsldADDR : Pseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym),
- "#GETtlsldADDR",
- [(set i64:$rD,
- (PPCgetTlsldAddr i64:$reg, tglobaltlsaddr:$sym))]>,
- isPPC64;
def ADDISdtprelHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
"#ADDISdtprelHA",
[(set i64:$rD,
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index 42b740f4fa46..197e751464ad 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -110,10 +110,8 @@ def PPCldGotTprelL : SDNode<"PPCISD::LD_GOT_TPREL_L", SDTIntBinOp,
def PPCaddTls : SDNode<"PPCISD::ADD_TLS", SDTIntBinOp, []>;
def PPCaddisTlsgdHA : SDNode<"PPCISD::ADDIS_TLSGD_HA", SDTIntBinOp>;
def PPCaddiTlsgdL : SDNode<"PPCISD::ADDI_TLSGD_L", SDTIntBinOp>;
-def PPCgetTlsAddr : SDNode<"PPCISD::GET_TLS_ADDR", SDTIntBinOp>;
def PPCaddisTlsldHA : SDNode<"PPCISD::ADDIS_TLSLD_HA", SDTIntBinOp>;
def PPCaddiTlsldL : SDNode<"PPCISD::ADDI_TLSLD_L", SDTIntBinOp>;
-def PPCgetTlsldAddr : SDNode<"PPCISD::GET_TLSLD_ADDR", SDTIntBinOp>;
def PPCaddisDtprelHA : SDNode<"PPCISD::ADDIS_DTPREL_HA", SDTIntBinOp,
[SDNPHasChain]>;
def PPCaddiDtprelL : SDNode<"PPCISD::ADDI_DTPREL_L", SDTIntBinOp>;
@@ -136,9 +134,15 @@ def SDT_PPCCall : SDTypeProfile<0, -1, [SDTCisInt<0>]>;
def PPCcall : SDNode<"PPCISD::CALL", SDT_PPCCall,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
SDNPVariadic]>;
+def PPCcall_tls : SDNode<"PPCISD::CALL_TLS", SDT_PPCCall,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
def PPCcall_nop : SDNode<"PPCISD::CALL_NOP", SDT_PPCCall,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
SDNPVariadic]>;
+def PPCcall_nop_tls : SDNode<"PPCISD::CALL_NOP_TLS", SDT_PPCCall,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
def PPCload : SDNode<"PPCISD::LOAD", SDTypeProfile<1, 1, []>,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
def PPCload_toc : SDNode<"PPCISD::LOAD_TOC", SDTypeProfile<0, 1, []>,
@@ -588,6 +592,12 @@ def tlsreg32 : Operand<i32> {
let EncoderMethod = "getTLSRegEncoding";
let ParserMatchClass = PPCTLSRegOperand;
}
+def tlsgd32 : Operand<i32> {}
+def tlscall32 : Operand<i32> {
+ let PrintMethod = "printTLSCall";
+ let MIOperandInfo = (ops calltarget:$func, tlsgd32:$sym);
+ let EncoderMethod = "getTLSCallEncoding";
+}
// PowerPC Predicate operand.
def pred : Operand<OtherVT> {
@@ -1071,6 +1081,8 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR] in {
"bla $func", IIC_BrB, [(PPCcall (i32 imm:$func))]>;
let isCodeGenOnly = 1 in {
+ def BL_TLS : IForm<18, 0, 1, (outs), (ins tlscall32:$func),
+ "bl $func", IIC_BrB, []>;
def BCCL : BForm<16, 0, 1, (outs), (ins pred:$cond, condbrtarget:$dst),
"b${cond:cc}l${cond:pm} ${cond:reg}, $dst">;
def BCCLA : BForm<16, 1, 1, (outs), (ins pred:$cond, abscondbrtarget:$dst),
@@ -2358,6 +2370,8 @@ def : Pat<(PPCcall (i32 tglobaladdr:$dst)),
def : Pat<(PPCcall (i32 texternalsym:$dst)),
(BL texternalsym:$dst)>;
+def : Pat<(PPCcall_tls texternalsym:$func, tglobaltlsaddr:$sym),
+ (BL_TLS texternalsym:$func, tglobaltlsaddr:$sym)>;
def : Pat<(PPCtc_return (i32 tglobaladdr:$dst), imm:$imm),
(TCRETURNdi tglobaladdr:$dst, imm:$imm)>;
@@ -2396,13 +2410,37 @@ def : Pat<(add i32:$in, (PPChi tblockaddress:$g, 0)),
def PPC32GOT: Pseudo<(outs gprc:$rD), (ins), "#PPC32GOT",
[(set i32:$rD, (PPCppc32GOT))]>;
+// Get the _GLOBAL_OFFSET_TABLE_ in PIC mode.
+// This uses two output registers, the first as the real output, the second as a
+// temporary register, used internally in code generation.
+def PPC32PICGOT: Pseudo<(outs gprc:$rD, gprc:$rT), (ins), "#PPC32PICGOT",
+ []>, NoEncode<"$rT">;
+
def LDgotTprelL32: Pseudo<(outs gprc:$rD), (ins s16imm:$disp, gprc_nor0:$reg),
- "#LDgotTprelL32",
- [(set i32:$rD,
- (PPCldGotTprelL tglobaltlsaddr:$disp, i32:$reg))]>;
+ "#LDgotTprelL32",
+ [(set i32:$rD,
+ (PPCldGotTprelL tglobaltlsaddr:$disp, i32:$reg))]>;
def : Pat<(PPCaddTls i32:$in, tglobaltlsaddr:$g),
(ADD4TLS $in, tglobaltlsaddr:$g)>;
+def ADDItlsgdL32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp),
+ "#ADDItlsgdL32",
+ [(set i32:$rD,
+ (PPCaddiTlsgdL i32:$reg, tglobaltlsaddr:$disp))]>;
+def ADDItlsldL32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp),
+ "#ADDItlsldL32",
+ [(set i32:$rD,
+ (PPCaddiTlsldL i32:$reg, tglobaltlsaddr:$disp))]>;
+def ADDIdtprelL32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp),
+ "#ADDIdtprelL32",
+ [(set i32:$rD,
+ (PPCaddiDtprelL i32:$reg, tglobaltlsaddr:$disp))]>;
+def ADDISdtprelHA32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp),
+ "#ADDISdtprelHA32",
+ [(set i32:$rD,
+ (PPCaddisDtprelHA i32:$reg,
+ tglobaltlsaddr:$disp))]>;
+
// Support for Position-independent code
def LWZtoc: Pseudo<(outs gprc:$rD), (ins tocentry32:$disp, gprc:$reg),
"#LWZtoc",
diff --git a/lib/Target/PowerPC/PPCMCInstLower.cpp b/lib/Target/PowerPC/PPCMCInstLower.cpp
index 668041371780..681eda8b9ea4 100644
--- a/lib/Target/PowerPC/PPCMCInstLower.cpp
+++ b/lib/Target/PowerPC/PPCMCInstLower.cpp
@@ -137,6 +137,12 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
case PPCII::MO_TLS:
RefKind = MCSymbolRefExpr::VK_PPC_TLS;
break;
+ case PPCII::MO_TLSGD:
+ RefKind = MCSymbolRefExpr::VK_PPC_TLSGD;
+ break;
+ case PPCII::MO_TLSLD:
+ RefKind = MCSymbolRefExpr::VK_PPC_TLSLD;
+ break;
}
if (MO.getTargetFlags() == PPCII::MO_PLT_OR_STUB && !isDarwin)
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.td b/lib/Target/PowerPC/PPCRegisterInfo.td
index b3d145b2cc49..0442a941f45a 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.td
+++ b/lib/Target/PowerPC/PPCRegisterInfo.td
@@ -188,13 +188,6 @@ def CR6 : CR<6, "cr6", [CR6LT, CR6GT, CR6EQ, CR6UN]>, DwarfRegNum<[74, 74]>;
def CR7 : CR<7, "cr7", [CR7LT, CR7GT, CR7EQ, CR7UN]>, DwarfRegNum<[75, 75]>;
}
-// The full condition-code register. This is not modeled fully, but defined
-// here primarily, for compatibility with gcc, to allow the inline asm "cc"
-// clobber specification to work.
-def CC : PPCReg<"cc">, DwarfRegAlias<CR0> {
- let Aliases = [CR0, CR1, CR2, CR3, CR4, CR5, CR6, CR7];
-}
-
// Link register
def LR : SPR<8, "lr">, DwarfRegNum<[-2, 65]>;
//let Aliases = [LR] in
@@ -308,7 +301,3 @@ def CARRYRC : RegisterClass<"PPC", [i32], 32, (add CARRY)> {
let CopyCost = -1;
}
-def CCRC : RegisterClass<"PPC", [i32], 32, (add CC)> {
- let isAllocatable = 0;
-}
-
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index 2d494b4aeeb6..ae3eea4943d7 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -2829,6 +2829,9 @@ bool X86FastISel::FastLowerCall(CallLoweringInfo &CLI) {
// VExt has not been implemented, so this should be impossible to reach
// for now. However, fallback to Selection DAG isel once implemented.
return false;
+ case CCValAssign::AExtUpper:
+ case CCValAssign::SExtUpper:
+ case CCValAssign::ZExtUpper:
case CCValAssign::FPExt:
llvm_unreachable("Unexpected loc info!");
case CCValAssign::Indirect:
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index e83a5c421b47..9cf0ca0912f9 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -1623,15 +1623,30 @@ LinearFunctionTestReplace(Loop *L,
// compare against the post-incremented value, otherwise we must compare
// against the preincremented value.
if (L->getExitingBlock() == L->getLoopLatch()) {
- // Add one to the "backedge-taken" count to get the trip count.
- // This addition may overflow, which is valid as long as the comparison is
- // truncated to BackedgeTakenCount->getType().
- IVCount = SE->getAddExpr(BackedgeTakenCount,
- SE->getConstant(BackedgeTakenCount->getType(), 1));
// The BackedgeTaken expression contains the number of times that the
// backedge branches to the loop header. This is one less than the
// number of times the loop executes, so use the incremented indvar.
- CmpIndVar = IndVar->getIncomingValueForBlock(L->getExitingBlock());
+ llvm::Value *IncrementedIndvar =
+ IndVar->getIncomingValueForBlock(L->getExitingBlock());
+ const auto *IncrementedIndvarSCEV =
+ cast<SCEVAddRecExpr>(SE->getSCEV(IncrementedIndvar));
+ // It is unsafe to use the incremented indvar if it has a wrapping flag, we
+ // don't want to compare against a poison value. Check the SCEV that
+ // corresponds to the incremented indvar, the SCEVExpander will only insert
+ // flags in the IR if the SCEV originally had wrapping flags.
+ // FIXME: In theory, SCEV could drop flags even though they exist in IR.
+ // A more robust solution would involve getting a new expression for
+ // CmpIndVar by applying non-NSW/NUW AddExprs.
+ if (!ScalarEvolution::maskFlags(IncrementedIndvarSCEV->getNoWrapFlags(),
+ SCEV::FlagNUW | SCEV::FlagNSW)) {
+ // Add one to the "backedge-taken" count to get the trip count.
+ // This addition may overflow, which is valid as long as the comparison is
+ // truncated to BackedgeTakenCount->getType().
+ IVCount =
+ SE->getAddExpr(BackedgeTakenCount,
+ SE->getConstant(BackedgeTakenCount->getType(), 1));
+ CmpIndVar = IncrementedIndvar;
+ }
}
Value *ExitCnt = genLoopLimit(IndVar, IVCount, L, Rewriter, SE);
diff --git a/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp b/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
index a7e80240d9e7..c2467fecb5eb 100644
--- a/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
+++ b/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
@@ -27,18 +27,18 @@
//
// header:
// br %cond, label %if.then, label %if.else
-// / \
-// / \
-// / \
+// + +
+// + +
+// + +
// if.then: if.else:
// %lt = load %addr_l %le = load %addr_l
// <use %lt> <use %le>
// <...> <...>
// store %st, %addr_s store %se, %addr_s
// br label %if.end br label %if.end
-// \ /
-// \ /
-// \ /
+// + +
+// + +
+// + +
// if.end ("footer"):
// <...>
//
@@ -47,16 +47,16 @@
// header:
// %l = load %addr_l
// br %cond, label %if.then, label %if.else
-// / \
-// / \
-// / \
+// + +
+// + +
+// + +
// if.then: if.else:
// <use %l> <use %l>
// <...> <...>
// br label %if.end br label %if.end
-// \ /
-// \ /
-// \ /
+// + +
+// + +
+// + +
// if.end ("footer"):
// %s.sink = phi [%st, if.then], [%se, if.else]
// <...>
diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp
index 8c7f253290ba..f902eb23cbcf 100644
--- a/lib/Transforms/Scalar/SROA.cpp
+++ b/lib/Transforms/Scalar/SROA.cpp
@@ -2697,7 +2697,10 @@ private:
// the old pointer, which necessarily must be in the right position to
// dominate the PHI.
IRBuilderTy PtrBuilder(IRB);
- PtrBuilder.SetInsertPoint(OldPtr);
+ if (isa<PHINode>(OldPtr))
+ PtrBuilder.SetInsertPoint(OldPtr->getParent()->getFirstInsertionPt());
+ else
+ PtrBuilder.SetInsertPoint(OldPtr);
PtrBuilder.SetCurrentDebugLocation(OldPtr->getDebugLoc());
Value *NewPtr = getNewAllocaSlicePtr(PtrBuilder, OldPtr->getType());
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index 531d349d845d..79fcb09f8913 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -3357,7 +3357,7 @@ void InnerLoopVectorizer::updateAnalysis() {
DT->addNewBlock(LoopMiddleBlock, LoopBypassBlocks[1]);
DT->addNewBlock(LoopScalarPreHeader, LoopBypassBlocks[0]);
DT->changeImmediateDominator(LoopScalarBody, LoopScalarPreHeader);
- DT->changeImmediateDominator(LoopExitBlock, LoopMiddleBlock);
+ DT->changeImmediateDominator(LoopExitBlock, LoopBypassBlocks[0]);
DEBUG(DT->verifyDomTree());
}
@@ -3466,6 +3466,15 @@ bool LoopVectorizationLegality::canVectorize() {
return false;
}
+ // We only handle bottom-tested loops, i.e. loop in which the condition is
+ // checked at the end of each iteration. With that we can assume that all
+ // instructions in the loop are executed the same number of times.
+ if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch()) {
+ emitAnalysis(
+ Report() << "loop control flow is not understood by vectorizer");
+ return false;
+ }
+
// We need to have a loop header.
DEBUG(dbgs() << "LV: Found a loop: " <<
TheLoop->getHeader()->getName() << '\n');
@@ -5192,7 +5201,13 @@ LoopVectorizationLegality::isInductionVariable(PHINode *Phi) {
return IK_NoInduction;
assert(PhiTy->isPointerTy() && "The PHI must be a pointer");
- uint64_t Size = DL->getTypeAllocSize(PhiTy->getPointerElementType());
+ Type *PointerElementType = PhiTy->getPointerElementType();
+ // The pointer stride cannot be determined if the pointer element type is not
+ // sized.
+ if (!PointerElementType->isSized())
+ return IK_NoInduction;
+
+ uint64_t Size = DL->getTypeAllocSize(PointerElementType);
if (C->getValue()->equalsInt(Size))
return IK_PtrInduction;
else if (C->getValue()->equalsInt(0 - Size))
diff --git a/test/Analysis/BlockFrequencyInfo/extremely-likely-loop-successor.ll b/test/Analysis/BlockFrequencyInfo/extremely-likely-loop-successor.ll
new file mode 100644
index 000000000000..f1ddd13ef0d6
--- /dev/null
+++ b/test/Analysis/BlockFrequencyInfo/extremely-likely-loop-successor.ll
@@ -0,0 +1,40 @@
+; RUN: opt < %s -analyze -block-freq | FileCheck %s
+
+; PR21622: Check for a crasher when the sum of exits to the same successor of a
+; loop overflows.
+
+; CHECK-LABEL: Printing analysis {{.*}} for function 'extremely_likely_loop_successor':
+; CHECK-NEXT: block-frequency-info: extremely_likely_loop_successor
+define void @extremely_likely_loop_successor() {
+; CHECK-NEXT: entry: float = 1.0, int = [[ENTRY:[0-9]+]]
+entry:
+ br label %loop
+
+; CHECK-NEXT: loop: float = 1.0,
+loop:
+ %exit.1.cond = call i1 @foo()
+ br i1 %exit.1.cond, label %exit, label %loop.2, !prof !0
+
+; CHECK-NEXT: loop.2: float = 0.0000000
+loop.2:
+ %exit.2.cond = call i1 @foo()
+ br i1 %exit.2.cond, label %exit, label %loop.3, !prof !0
+
+; CHECK-NEXT: loop.3: float = 0.0000000
+loop.3:
+ %exit.3.cond = call i1 @foo()
+ br i1 %exit.3.cond, label %exit, label %loop.4, !prof !0
+
+; CHECK-NEXT: loop.4: float = 0.0,
+loop.4:
+ %exit.4.cond = call i1 @foo()
+ br i1 %exit.4.cond, label %exit, label %loop, !prof !0
+
+; CHECK-NEXT: exit: float = 1.0, int = [[ENTRY]]
+exit:
+ ret void
+}
+
+declare i1 @foo()
+
+!0 = metadata !{metadata !"branch_weights", i32 4294967295, i32 1}
diff --git a/test/CodeGen/ARM/ghc-tcreturn-lowered.ll b/test/CodeGen/ARM/ghc-tcreturn-lowered.ll
new file mode 100644
index 000000000000..6d2564ba1ab6
--- /dev/null
+++ b/test/CodeGen/ARM/ghc-tcreturn-lowered.ll
@@ -0,0 +1,21 @@
+; RUN: llc -mtriple=thumbv7-eabi -o - %s | FileCheck %s
+
+declare cc 10 void @g()
+
+define cc 10 void @test_direct_tail() {
+; CHECK-LABEL: test_direct_tail:
+; CHECK: b g
+
+ tail call cc10 void @g()
+ ret void
+}
+
+@ind_func = global void()* zeroinitializer
+
+define cc 10 void @test_indirect_tail() {
+; CHECK-LABEL: test_indirect_tail:
+; CHECK: bx {{r[0-9]+}}
+ %func = load void()** @ind_func
+ tail call cc10 void()* %func()
+ ret void
+}
diff --git a/test/CodeGen/Mips/abicalls.ll b/test/CodeGen/Mips/abicalls.ll
index 6fa33aa158ad..7edc3e25c352 100644
--- a/test/CodeGen/Mips/abicalls.ll
+++ b/test/CodeGen/Mips/abicalls.ll
@@ -1,16 +1,11 @@
-;
-; When the assembler is ready a .s file for it will
-; be created.
+; RUN: llc -filetype=asm -mtriple mipsel-unknown-linux -mcpu=mips32 -relocation-model=static %s -o - | FileCheck -check-prefix=ABICALLS -check-prefix=STATIC %s
+; RUN: llc -filetype=asm -mtriple mipsel-unknown-linux -mcpu=mips32 %s -o - | FileCheck -check-prefix=ABICALLS -check-prefix=PIC %s
+; RUN: llc -filetype=asm -mtriple mips64el-unknown-linux -mcpu=mips4 -relocation-model=static %s -o - | FileCheck -check-prefix=ABICALLS -check-prefix=PIC %s
+; RUN: llc -filetype=asm -mtriple mips64el-unknown-linux -mcpu=mips64 -relocation-model=static %s -o - | FileCheck -check-prefix=ABICALLS -check-prefix=PIC %s
-; Note that EF_MIPS_CPIC is set by -mabicalls which is the default on Linux
-; TODO need to support -mno-abicalls
+; RUN: llc -filetype=asm -mtriple mipsel-unknown-linux -mcpu=mips32 -mattr noabicalls -relocation-model=static %s -o - | FileCheck -implicit-check-not='.abicalls' -implicit-check-not='pic0' %s
-; RUN: llc -filetype=asm -mtriple mipsel-unknown-linux -mcpu=mips32 -relocation-model=static %s -o - | FileCheck -check-prefix=CHECK-STATIC %s
-; RUN: llc -filetype=asm -mtriple mipsel-unknown-linux -mcpu=mips32 %s -o - | FileCheck -check-prefix=CHECK-PIC %s
-; RUN: llc -filetype=asm -mtriple mips64el-unknown-linux -mcpu=mips4 -relocation-model=static %s -o - | FileCheck -check-prefix=CHECK-PIC %s
-; RUN: llc -filetype=asm -mtriple mips64el-unknown-linux -mcpu=mips64 -relocation-model=static %s -o - | FileCheck -check-prefix=CHECK-PIC %s
+; ABICALLS: .abicalls
-; CHECK-STATIC: .abicalls
-; CHECK-STATIC-NEXT: pic0
-; CHECK-PIC: .abicalls
-; CHECK-PIC-NOT: pic0
+; STATIC: pic0
+; PIC-NOT: pic0
diff --git a/test/CodeGen/Mips/atomic.ll b/test/CodeGen/Mips/atomic.ll
index f4118ecec79d..78fd8296178e 100644
--- a/test/CodeGen/Mips/atomic.ll
+++ b/test/CodeGen/Mips/atomic.ll
@@ -12,7 +12,7 @@
@x = common global i32 0, align 4
-define i32 @AtomicLoadAdd32(i32 %incr) nounwind {
+define i32 @AtomicLoadAdd32(i32 signext %incr) nounwind {
entry:
%0 = atomicrmw add i32* @x, i32 %incr monotonic
ret i32 %0
@@ -29,7 +29,7 @@ entry:
; ALL: beqz $[[R2]], $[[BB0]]
}
-define i32 @AtomicLoadNand32(i32 %incr) nounwind {
+define i32 @AtomicLoadNand32(i32 signext %incr) nounwind {
entry:
%0 = atomicrmw nand i32* @x, i32 %incr monotonic
ret i32 %0
@@ -47,7 +47,7 @@ entry:
; ALL: beqz $[[R2]], $[[BB0]]
}
-define i32 @AtomicSwap32(i32 %newval) nounwind {
+define i32 @AtomicSwap32(i32 signext %newval) nounwind {
entry:
%newval.addr = alloca i32, align 4
store i32 %newval, i32* %newval.addr, align 4
@@ -66,7 +66,7 @@ entry:
; ALL: beqz $[[R2]], $[[BB0]]
}
-define i32 @AtomicCmpSwap32(i32 %oldval, i32 %newval) nounwind {
+define i32 @AtomicCmpSwap32(i32 signext %oldval, i32 signext %newval) nounwind {
entry:
%newval.addr = alloca i32, align 4
store i32 %newval, i32* %newval.addr, align 4
@@ -293,7 +293,7 @@ entry:
; HAS-SEB-SEH: seb $2, $[[R17]]
}
-define i1 @AtomicCmpSwapRes8(i8* %ptr, i8 %oldval, i8 signext %newval) nounwind {
+define i1 @AtomicCmpSwapRes8(i8* %ptr, i8 signext %oldval, i8 signext %newval) nounwind {
entry:
%0 = cmpxchg i8* %ptr, i8 %oldval, i8 %newval monotonic monotonic
%1 = extractvalue { i8, i1 } %0, 1
@@ -381,7 +381,7 @@ entry:
@countsint = common global i32 0, align 4
-define i32 @CheckSync(i32 %v) nounwind noinline {
+define i32 @CheckSync(i32 signext %v) nounwind noinline {
entry:
%0 = atomicrmw add i32* @countsint, i32 %v seq_cst
ret i32 %0
@@ -415,7 +415,7 @@ entry:
; Check that MIPS32R6 has the correct offset range.
; FIXME: At the moment, we don't seem to do addr+offset for any atomic load/store.
-define i32 @AtomicLoadAdd32_OffGt9Bit(i32 %incr) nounwind {
+define i32 @AtomicLoadAdd32_OffGt9Bit(i32 signext %incr) nounwind {
entry:
%0 = atomicrmw add i32* getelementptr(i32* @x, i32 256), i32 %incr monotonic
ret i32 %0
diff --git a/test/CodeGen/Mips/bswap.ll b/test/CodeGen/Mips/bswap.ll
index 812eef137773..f182e65b0266 100644
--- a/test/CodeGen/Mips/bswap.ll
+++ b/test/CodeGen/Mips/bswap.ll
@@ -2,7 +2,7 @@
; RUN: llc < %s -march=mips64el -mcpu=mips64r2 | FileCheck %s -check-prefix=MIPS64
; RUN: llc < %s -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips32r2 -mattr=+mips16 | FileCheck %s -check-prefix=MIPS16
-define i32 @bswap32(i32 %x) nounwind readnone {
+define i32 @bswap32(i32 signext %x) nounwind readnone {
entry:
; MIPS32-LABEL: bswap32:
; MIPS32: wsbh $[[R0:[0-9]+]]
@@ -29,7 +29,7 @@ entry:
ret i32 %or.3
}
-define i64 @bswap64(i64 %x) nounwind readnone {
+define i64 @bswap64(i64 signext %x) nounwind readnone {
entry:
; MIPS32-LABEL: bswap64:
; MIPS32: wsbh $[[R0:[0-9]+]]
@@ -72,24 +72,24 @@ entry:
define <4 x i32> @bswapv4i32(<4 x i32> %x) nounwind readnone {
entry:
; MIPS32-LABEL: bswapv4i32:
-; MIPS32: wsbh $[[R0:[0-9]+]]
-; MIPS32: rotr ${{[0-9]+}}, $[[R0]], 16
-; MIPS32: wsbh $[[R0:[0-9]+]]
-; MIPS32: rotr ${{[0-9]+}}, $[[R0]], 16
-; MIPS32: wsbh $[[R0:[0-9]+]]
-; MIPS32: rotr ${{[0-9]+}}, $[[R0]], 16
-; MIPS32: wsbh $[[R0:[0-9]+]]
-; MIPS32: rotr ${{[0-9]+}}, $[[R0]], 16
+; MIPS32-DAG: wsbh $[[R0:[0-9]+]]
+; MIPS32-DAG: rotr ${{[0-9]+}}, $[[R0]], 16
+; MIPS32-DAG: wsbh $[[R0:[0-9]+]]
+; MIPS32-DAG: rotr ${{[0-9]+}}, $[[R0]], 16
+; MIPS32-DAG: wsbh $[[R0:[0-9]+]]
+; MIPS32-DAG: rotr ${{[0-9]+}}, $[[R0]], 16
+; MIPS32-DAG: wsbh $[[R0:[0-9]+]]
+; MIPS32-DAG: rotr ${{[0-9]+}}, $[[R0]], 16
; MIPS64-LABEL: bswapv4i32:
-; MIPS64: wsbh $[[R0:[0-9]+]]
-; MIPS64: rotr ${{[0-9]+}}, $[[R0]], 16
-; MIPS64: wsbh $[[R0:[0-9]+]]
-; MIPS64: rotr ${{[0-9]+}}, $[[R0]], 16
-; MIPS64: wsbh $[[R0:[0-9]+]]
-; MIPS64: rotr ${{[0-9]+}}, $[[R0]], 16
-; MIPS64: wsbh $[[R0:[0-9]+]]
-; MIPS64: rotr ${{[0-9]+}}, $[[R0]], 16
+; MIPS64-DAG: wsbh $[[R0:[0-9]+]]
+; MIPS64-DAG: rotr ${{[0-9]+}}, $[[R0]], 16
+; MIPS64-DAG: wsbh $[[R0:[0-9]+]]
+; MIPS64-DAG: rotr ${{[0-9]+}}, $[[R0]], 16
+; MIPS64-DAG: wsbh $[[R0:[0-9]+]]
+; MIPS64-DAG: rotr ${{[0-9]+}}, $[[R0]], 16
+; MIPS64-DAG: wsbh $[[R0:[0-9]+]]
+; MIPS64-DAG: rotr ${{[0-9]+}}, $[[R0]], 16
; Don't bother with a MIPS16 version. It's just bswap32 repeated four times and
; would be very long
diff --git a/test/CodeGen/Mips/cconv/arguments-float.ll b/test/CodeGen/Mips/cconv/arguments-float.ll
index e2119ec08028..14a3baa7f539 100644
--- a/test/CodeGen/Mips/cconv/arguments-float.ll
+++ b/test/CodeGen/Mips/cconv/arguments-float.ll
@@ -69,26 +69,26 @@ entry:
; O32-DAG: sw [[R4]], 28([[R2]])
; NEW-DAG: sd $6, 24([[R2]])
-; O32-DAG: lw [[R3:\$[0-9]+]], 32($sp)
-; O32-DAG: lw [[R4:\$[0-9]+]], 36($sp)
+; O32-DAG: lw [[R3:\$([0-9]+|gp)]], 32($sp)
+; O32-DAG: lw [[R4:\$([0-9]+|gp)]], 36($sp)
; O32-DAG: sw [[R3]], 32([[R2]])
; O32-DAG: sw [[R4]], 36([[R2]])
; NEW-DAG: sd $7, 32([[R2]])
-; O32-DAG: lw [[R3:\$[0-9]+]], 40($sp)
-; O32-DAG: lw [[R4:\$[0-9]+]], 44($sp)
+; O32-DAG: lw [[R3:\$([0-9]+|gp)]], 40($sp)
+; O32-DAG: lw [[R4:\$([0-9]+|gp)]], 44($sp)
; O32-DAG: sw [[R3]], 40([[R2]])
; O32-DAG: sw [[R4]], 44([[R2]])
; NEW-DAG: sd $8, 40([[R2]])
-; O32-DAG: lw [[R3:\$[0-9]+]], 48($sp)
-; O32-DAG: lw [[R4:\$[0-9]+]], 52($sp)
+; O32-DAG: lw [[R3:\$([0-9]+|gp)]], 48($sp)
+; O32-DAG: lw [[R4:\$([0-9]+|gp)]], 52($sp)
; O32-DAG: sw [[R3]], 48([[R2]])
; O32-DAG: sw [[R4]], 52([[R2]])
; NEW-DAG: sd $9, 48([[R2]])
-; O32-DAG: lw [[R3:\$[0-9]+]], 56($sp)
-; O32-DAG: lw [[R4:\$[0-9]+]], 60($sp)
+; O32-DAG: lw [[R3:\$([0-9]+|gp)]], 56($sp)
+; O32-DAG: lw [[R4:\$([0-9]+|gp)]], 60($sp)
; O32-DAG: sw [[R3]], 56([[R2]])
; O32-DAG: sw [[R4]], 60([[R2]])
; NEW-DAG: sd $10, 56([[R2]])
@@ -135,8 +135,8 @@ entry:
; SYM64-DAG: ld [[R2:\$[0-9]]], %got_disp(floats)(
; The first four arguments are the same in O32/N32/N64.
-; The first argument isn't floating point so floating point registers are not
-; used.
+; The first argument is floating point but soft-float is enabled so floating
+; point registers are not used.
; MD00305 and GCC disagree on this one. MD00305 says that floats are treated
; as 8-byte aligned and occupy two slots on O32. GCC is treating them as 4-byte
; aligned and occupying one slot. We'll use GCC's definition.
@@ -195,7 +195,7 @@ entry:
; O32-DAG: sw $7, 12([[R2]])
; NEW-DAG: sd $5, 8([[R2]])
-define void @float_arg2(i8 %a, float %b) nounwind {
+define void @float_arg2(i8 signext %a, float %b) nounwind {
entry:
%0 = getelementptr [11 x i8]* @bytes, i32 0, i32 1
store volatile i8 %a, i8* %0
diff --git a/test/CodeGen/Mips/cconv/arguments-hard-float-varargs.ll b/test/CodeGen/Mips/cconv/arguments-hard-float-varargs.ll
index aadf7d18c17d..70ccf14c5450 100644
--- a/test/CodeGen/Mips/cconv/arguments-hard-float-varargs.ll
+++ b/test/CodeGen/Mips/cconv/arguments-hard-float-varargs.ll
@@ -4,11 +4,11 @@
; RUN-TODO: llc -march=mips64 -relocation-model=static -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32 %s
; RUN-TODO: llc -march=mips64el -relocation-model=static -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32 %s
-; RUN: llc -march=mips64 -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=N32 --check-prefix=NEW %s
-; RUN: llc -march=mips64el -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=N32 --check-prefix=NEW %s
+; RUN: llc -march=mips64 -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=N32 --check-prefix=NEW --check-prefix=NEWBE %s
+; RUN: llc -march=mips64el -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=N32 --check-prefix=NEW --check-prefix=NEWLE %s
-; RUN: llc -march=mips64 -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM64 --check-prefix=N64 --check-prefix=NEW %s
-; RUN: llc -march=mips64el -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM64 --check-prefix=N64 --check-prefix=NEW %s
+; RUN: llc -march=mips64 -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM64 --check-prefix=N64 --check-prefix=NEW --check-prefix=NEWBE %s
+; RUN: llc -march=mips64el -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM64 --check-prefix=N64 --check-prefix=NEW --check-prefix=NEWLE %s
; Test the effect of varargs on floating point types in the non-variable part
; of the argument list as specified by section 2 of the MIPSpro N32 Handbook.
@@ -34,6 +34,7 @@ entry:
%b = va_arg i8** %ap, double
%1 = getelementptr [11 x double]* @doubles, i32 0, i32 2
store volatile double %b, double* %1
+ call void @llvm.va_end(i8* %ap2)
ret void
}
@@ -98,6 +99,7 @@ entry:
%b = va_arg i8** %ap, float
%1 = getelementptr [11 x float]* @floats, i32 0, i32 2
store volatile float %b, float* %1
+ call void @llvm.va_end(i8* %ap2)
ret void
}
@@ -140,16 +142,18 @@ entry:
; Increment the pointer then get the varargs arg
; LLVM will rebind the load to the stack pointer instead of the varargs pointer
; during lowering. This is fine and doesn't change the behaviour.
-; N32/N64 is using ori instead of addiu/daddiu but (although odd) this is fine
-; since the stack is always aligned.
+; Also, in big-endian mode the offset must be increased by 4 to retrieve the
+; correct half of the argument slot.
+;
; O32-DAG: addiu [[VAPTR]], [[VAPTR]], 4
; O32-DAG: sw [[VAPTR]], 4($sp)
-; N32-DAG: ori [[VAPTR]], [[VAPTR]], 4
+; N32-DAG: addiu [[VAPTR]], [[VAPTR]], 8
; N32-DAG: sw [[VAPTR]], 4($sp)
-; N64-DAG: ori [[VAPTR]], [[VAPTR]], 4
+; N64-DAG: daddiu [[VAPTR]], [[VAPTR]], 8
; N64-DAG: sd [[VAPTR]], 0($sp)
; O32-DAG: lwc1 [[FTMP1:\$f[0-9]+]], 12($sp)
-; NEW-DAG: lwc1 [[FTMP1:\$f[0-9]+]], 8($sp)
+; NEWLE-DAG: lwc1 [[FTMP1:\$f[0-9]+]], 8($sp)
+; NEWBE-DAG: lwc1 [[FTMP1:\$f[0-9]+]], 12($sp)
; ALL-DAG: swc1 [[FTMP1]], 8([[R2]])
declare void @llvm.va_start(i8*)
diff --git a/test/CodeGen/Mips/cconv/arguments-struct.ll b/test/CodeGen/Mips/cconv/arguments-struct.ll
new file mode 100644
index 000000000000..c1bc84ee7a04
--- /dev/null
+++ b/test/CodeGen/Mips/cconv/arguments-struct.ll
@@ -0,0 +1,41 @@
+; RUN: llc -mtriple=mips-unknown-linux-gnu -relocation-model=static < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32-BE %s
+; RUN: llc -mtriple=mipsel-unknown-linux-gnu -relocation-model=static < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32-LE %s
+
+; RUN-TODO: llc -mtriple=mips64-unknown-linux-gnu -relocation-model=static -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32-BE %s
+; RUN-TODO: llc -mtriple=mips64el-unknown-linux-gnu -relocation-model=static -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32-LE %s
+
+; RUN: llc -mtriple=mips64-unknown-linux-gnu -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=NEW-BE %s
+; RUN: llc -mtriple=mips64el-unknown-linux-gnu -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=NEW-LE %s
+
+; RUN: llc -mtriple=mips64-unknown-linux-gnu -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM64 --check-prefix=NEW-BE %s
+; RUN: llc -mtriple=mips64el-unknown-linux-gnu -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM64 --check-prefix=NEW-LE %s
+
+; Test small structures for all ABI's and byte orders.
+;
+; N32/N64 are identical in this area so their checks have been combined into
+; the 'NEW' prefix (the N stands for New).
+
+@bytes = global [2 x i8] zeroinitializer
+
+define void @s_i8(i8 inreg %a) nounwind {
+entry:
+ store i8 %a, i8* getelementptr inbounds ([2 x i8]* @bytes, i32 0, i32 1)
+ ret void
+}
+
+; ALL-LABEL: s_i8:
+
+; SYM32-DAG: lui [[PTR_HI:\$[0-9]+]], %hi(bytes)
+; SYM32-DAG: addiu [[PTR:\$[0-9]+]], [[PTR_HI]], %lo(bytes)
+
+; SYM64-DAG: ld [[PTR:\$[0-9]+]], %got_disp(bytes)(
+
+; O32-BE-DAG: srl [[ARG:\$[0-9]+]], $4, 24
+; O32-BE-DAG: sb [[ARG]], 1([[PTR]])
+
+; O32-LE-DAG: sb $4, 1([[PTR]])
+
+; NEW-BE-DAG: dsrl [[ARG:\$[0-9]+]], $4, 56
+; NEW-BE-DAG: sb [[ARG]], 1([[PTR]])
+
+; NEW-LE-DAG: sb $4, 1([[PTR]])
diff --git a/test/CodeGen/Mips/cconv/arguments-varargs.ll b/test/CodeGen/Mips/cconv/arguments-varargs.ll
new file mode 100644
index 000000000000..adacda5bc420
--- /dev/null
+++ b/test/CodeGen/Mips/cconv/arguments-varargs.ll
@@ -0,0 +1,1104 @@
+; RUN: llc -mtriple=mips-linux -relocation-model=static < %s | FileCheck --check-prefix=ALL --check-prefix=O32 --check-prefix=O32-BE %s
+; RUN: llc -mtriple=mipsel-linux -relocation-model=static < %s | FileCheck --check-prefix=ALL --check-prefix=O32 --check-prefix=O32-LE %s
+
+; RUN-TODO: llc -march=mips64 -relocation-model=static -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
+; RUN-TODO: llc -march=mips64el -relocation-model=static -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
+
+; RUN: llc -mtriple=mips64-linux -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=NEW --check-prefix=N32 --check-prefix=NEW-BE %s
+; RUN: llc -mtriple=mips64el-linux -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=NEW --check-prefix=N32 --check-prefix=NEW-LE %s
+
+; RUN: llc -march=mips64 -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=NEW --check-prefix=N64 --check-prefix=NEW-BE %s
+; RUN: llc -march=mips64el -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=NEW --check-prefix=N64 --check-prefix=NEW-LE %s
+
+@hwords = global [3 x i16] zeroinitializer, align 1
+@words = global [3 x i32] zeroinitializer, align 1
+@dwords = global [3 x i64] zeroinitializer, align 1
+
+define void @fn_i16_dotdotdot_i16(i16 %a, ...) {
+entry:
+; ALL-LABEL: fn_i16_dotdotdot_i16:
+
+; Set up the stack with an 8-byte local area. N32/N64 must also make room for
+; the argument save area (56 bytes).
+; O32: addiu [[SP:\$sp]], $sp, -8
+; N32: addiu [[SP:\$sp]], $sp, -64
+; N64: daddiu [[SP:\$sp]], $sp, -64
+
+; Save variable argument portion on the stack
+; O32-DAG: sw $7, 20([[SP]])
+; O32-DAG: sw $6, 16([[SP]])
+; O32-DAG: sw $5, 12([[SP]])
+
+; NEW-DAG: sd $11, 56([[SP]])
+; NEW-DAG: sd $10, 48([[SP]])
+; NEW-DAG: sd $9, 40([[SP]])
+; NEW-DAG: sd $8, 32([[SP]])
+; NEW-DAG: sd $7, 24([[SP]])
+; NEW-DAG: sd $6, 16([[SP]])
+; NEW-DAG: sd $5, 8([[SP]])
+
+; Initialize variable argument pointer.
+; For O32, the offset is 12 due to the 4 bytes used to store local variables,
+; 4 bytes padding to maintain stack alignment, and the 4 byte slot for the first
+; fixed argument.
+; For N32/N64, it is only 8 since the fixed arguments do not reserve stack
+; space.
+; O32-DAG: addiu [[VA:\$[0-9]+]], [[SP]], 12
+; O32-DAG: sw [[VA]], 0([[SP]])
+
+; N32-DAG: addiu [[VA:\$[0-9]+]], [[SP]], 8
+; N32-DAG: sw [[VA]], 0([[SP]])
+
+; N64-DAG: daddiu [[VA:\$[0-9]+]], [[SP]], 8
+; N64-DAG: sd [[VA]], 0([[SP]])
+
+; Store [[VA]]
+; O32-DAG: sw [[VA]], 0([[SP]])
+
+; ALL: # ANCHOR1
+
+; Increment [[VA]]
+; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]])
+; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4
+; O32-DAG: sw [[VA2]], 0([[SP]])
+
+; N32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]])
+; N32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 8
+; N32-DAG: sw [[VA2]], 0([[SP]])
+
+; N64-DAG: ld [[VA:\$[0-9]+]], 0([[SP]])
+; N64-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 8
+; N64-DAG: sd [[VA2]], 0([[SP]])
+
+; Load the first argument from the variable portion.
+; This has used the stack pointer directly rather than the [[VA]] we just set
+; up.
+; Big-endian mode for N32/N64 must add an additional 4 to the offset due to byte
+; order.
+; O32-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]])
+
+; NEW-LE-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]])
+; NEW-BE-DAG: lw [[ARG1:\$[0-9]+]], 4([[VA]])
+
+; Copy the arg to the global
+; O32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(hwords)
+
+; N32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(hwords)
+
+; N64-DAG: ld [[GV:\$[0-9]+]], %got_disp(hwords)(
+
+; ALL-DAG: sh [[ARG1]], 2([[GV]])
+
+; ALL: # ANCHOR2
+
+; Increment [[VA]] again.
+; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]])
+; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4
+; O32-DAG: sw [[VA2]], 0([[SP]])
+
+; N32-DAG: lw [[VA2:\$[0-9]+]], 0([[SP]])
+; N32-DAG: addiu [[VA3:\$[0-9]+]], [[VA2]], 8
+; N32-DAG: sw [[VA3]], 0([[SP]])
+
+; N64-DAG: ld [[VA2:\$[0-9]+]], 0([[SP]])
+; N64-DAG: daddiu [[VA3:\$[0-9]+]], [[VA2]], 8
+; N64-DAG: sd [[VA3]], 0([[SP]])
+
+; Load the second argument from the variable portion.
+; O32-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA]])
+
+; NEW-LE-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA2]])
+; NEW-BE-DAG: lw [[ARG2:\$[0-9]+]], 4([[VA2]])
+
+; Copy the arg to the global
+; ALL-DAG: sh [[ARG2]], 4([[GV]])
+
+ %ap = alloca i8*, align 8
+ %ap2 = bitcast i8** %ap to i8*
+ call void @llvm.va_start(i8* %ap2)
+
+ call void asm sideeffect "# ANCHOR1", ""()
+ %arg1 = va_arg i8** %ap, i16
+ %e1 = getelementptr [3 x i16]* @hwords, i32 0, i32 1
+ store volatile i16 %arg1, i16* %e1, align 2
+
+ call void asm sideeffect "# ANCHOR2", ""()
+ %arg2 = va_arg i8** %ap, i16
+ %e2 = getelementptr [3 x i16]* @hwords, i32 0, i32 2
+ store volatile i16 %arg2, i16* %e2, align 2
+
+ call void @llvm.va_end(i8* %ap2)
+
+ ret void
+}
+
+define void @fn_i16_dotdotdot_i32(i16 %a, ...) {
+entry:
+; ALL-LABEL: fn_i16_dotdotdot_i32:
+
+; Set up the stack with an 8-byte local area. N32/N64 must also make room for
+; the argument save area (56 bytes).
+; O32: addiu [[SP:\$sp]], $sp, -8
+; N32: addiu [[SP:\$sp]], $sp, -64
+; N64: daddiu [[SP:\$sp]], $sp, -64
+
+; Save variable argument portion on the stack
+; O32-DAG: sw $7, 20([[SP]])
+; O32-DAG: sw $6, 16([[SP]])
+; O32-DAG: sw $5, 12([[SP]])
+
+; NEW-DAG: sd $11, 56([[SP]])
+; NEW-DAG: sd $10, 48([[SP]])
+; NEW-DAG: sd $9, 40([[SP]])
+; NEW-DAG: sd $8, 32([[SP]])
+; NEW-DAG: sd $7, 24([[SP]])
+; NEW-DAG: sd $6, 16([[SP]])
+; NEW-DAG: sd $5, 8([[SP]])
+
+; Initialize variable argument pointer.
+; For O32, the offset is 12 due to the 4 bytes used to store local variables,
+; 4 bytes padding to maintain stack alignment, and the 4 byte slot for the first
+; fixed argument.
+; For N32/N64, it is only 8 since the fixed arguments do not reserve stack
+; space.
+; O32-DAG: addiu [[VA:\$[0-9]+]], [[SP]], 12
+; O32-DAG: sw [[VA]], 0([[SP]])
+
+; N32-DAG: addiu [[VA:\$[0-9]+]], [[SP]], 8
+; N32-DAG: sw [[VA]], 0([[SP]])
+
+; N64-DAG: daddiu [[VA:\$[0-9]+]], [[SP]], 8
+; N64-DAG: sd [[VA]], 0([[SP]])
+
+; Store [[VA]]
+; O32-DAG: sw [[VA]], 0([[SP]])
+
+; ALL: # ANCHOR1
+
+; Increment [[VA]]
+; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]])
+; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4
+; O32-DAG: sw [[VA2]], 0([[SP]])
+
+; N32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]])
+; N32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 8
+; N32-DAG: sw [[VA2]], 0([[SP]])
+
+; N64-DAG: ld [[VA:\$[0-9]+]], 0([[SP]])
+; N64-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 8
+; N64-DAG: sd [[VA2]], 0([[SP]])
+
+; Load the first argument from the variable portion.
+; This has used the stack pointer directly rather than the [[VA]] we just set
+; up.
+; Big-endian mode for N32/N64 must add an additional 4 to the offset due to byte
+; order.
+; O32-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]])
+
+; NEW-LE-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]])
+; NEW-BE-DAG: lw [[ARG1:\$[0-9]+]], 4([[VA]])
+
+; Copy the arg to the global
+; O32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(words)
+
+; N32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(words)
+
+; N64-DAG: ld [[GV:\$[0-9]+]], %got_disp(words)(
+
+; ALL-DAG: sw [[ARG1]], 4([[GV]])
+
+; ALL: # ANCHOR2
+
+; Increment [[VA]] again.
+; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]])
+; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4
+; O32-DAG: sw [[VA2]], 0([[SP]])
+
+; N32-DAG: lw [[VA2:\$[0-9]+]], 0([[SP]])
+; N32-DAG: addiu [[VA3:\$[0-9]+]], [[VA2]], 8
+; N32-DAG: sw [[VA3]], 0([[SP]])
+
+; N64-DAG: ld [[VA2:\$[0-9]+]], 0([[SP]])
+; N64-DAG: daddiu [[VA3:\$[0-9]+]], [[VA2]], 8
+; N64-DAG: sd [[VA3]], 0([[SP]])
+
+; Load the second argument from the variable portion.
+; O32-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA]])
+
+; NEW-LE-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA2]])
+; NEW-BE-DAG: lw [[ARG2:\$[0-9]+]], 4([[VA2]])
+
+; Copy the arg to the global
+; ALL-DAG: sw [[ARG2]], 8([[GV]])
+
+ %ap = alloca i8*, align 8
+ %ap2 = bitcast i8** %ap to i8*
+ call void @llvm.va_start(i8* %ap2)
+
+ call void asm sideeffect "# ANCHOR1", ""()
+ %arg1 = va_arg i8** %ap, i32
+ %e1 = getelementptr [3 x i32]* @words, i32 0, i32 1
+ store volatile i32 %arg1, i32* %e1, align 4
+
+ call void asm sideeffect "# ANCHOR2", ""()
+ %arg2 = va_arg i8** %ap, i32
+ %e2 = getelementptr [3 x i32]* @words, i32 0, i32 2
+ store volatile i32 %arg2, i32* %e2, align 4
+
+ call void @llvm.va_end(i8* %ap2)
+
+ ret void
+}
+
+define void @fn_i16_dotdotdot_i64(i16 %a, ...) {
+entry:
+; ALL-LABEL: fn_i16_dotdotdot_i64:
+
+; Set up the stack with an 8-byte local area. N32/N64 must also make room for
+; the argument save area (56 bytes).
+; O32: addiu [[SP:\$sp]], $sp, -8
+; N32: addiu [[SP:\$sp]], $sp, -64
+; N64: daddiu [[SP:\$sp]], $sp, -64
+
+; Save variable argument portion on the stack
+; O32-DAG: sw $7, 20([[SP]])
+; O32-DAG: sw $6, 16([[SP]])
+; O32-DAG: sw $5, 12([[SP]])
+
+; NEW-DAG: sd $11, 56([[SP]])
+; NEW-DAG: sd $10, 48([[SP]])
+; NEW-DAG: sd $9, 40([[SP]])
+; NEW-DAG: sd $8, 32([[SP]])
+; NEW-DAG: sd $7, 24([[SP]])
+; NEW-DAG: sd $6, 16([[SP]])
+; NEW-DAG: sd $5, 8([[SP]])
+
+; Initialize variable argument pointer.
+; For O32, the offset is 12 due to the 4 bytes used to store local variables,
+; 4 bytes padding to maintain stack alignment, and the 4 byte slot for the first
+; fixed argument.
+; For N32/N64, it is only 8 since the fixed arguments do not reserve stack
+; space.
+; O32-DAG: addiu [[VA:\$[0-9]+]], [[SP]], 12
+; O32-DAG: sw [[VA]], 0([[SP]])
+
+; N32-DAG: addiu [[VA:\$[0-9]+]], [[SP]], 8
+; N32-DAG: sw [[VA]], 0([[SP]])
+
+; N64-DAG: daddiu [[VA:\$[0-9]+]], [[SP]], 8
+; N64-DAG: sd [[VA]], 0([[SP]])
+
+; Store [[VA]]
+; O32-DAG: sw [[VA]], 0([[SP]])
+
+; ALL: # ANCHOR1
+
+; Increment [[VA]] (and realign pointer for O32)
+; O32: lw [[VA:\$[0-9]+]], 0([[SP]])
+; O32-DAG: addiu [[VA_TMP0:\$[0-9]+]], [[VA]], 7
+; O32-DAG: addiu [[VA_TMP1:\$[0-9]+]], $zero, -8
+; O32-DAG: and [[VA_TMP2:\$[0-9]+]], [[VA_TMP0]], [[VA_TMP1]]
+; O32-DAG: ori [[VA2:\$[0-9]+]], [[VA_TMP2]], 4
+; O32-DAG: sw [[VA2]], 0([[SP]])
+
+; N32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]])
+; N32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 8
+; N32-DAG: sw [[VA2]], 0([[SP]])
+
+; N64-DAG: ld [[VA:\$[0-9]+]], 0([[SP]])
+; N64-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 8
+; N64-DAG: sd [[VA2]], 0([[SP]])
+
+; Load the first argument from the variable portion and copy it to the global.
+; This has used the stack pointer directly rather than the [[VA]] we just set
+; up.
+; Big-endian mode for N32/N64 must add an additional 4 to the offset due to byte
+; order.
+; O32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(dwords)
+; O32-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]])
+; O32-DAG: sw [[ARG1]], 8([[GV]])
+; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]])
+; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4
+; O32-DAG: sw [[VA2]], 0([[SP]])
+; O32-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]])
+; O32-DAG: sw [[ARG1]], 12([[GV]])
+
+; N32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(dwords)
+; N64-DAG: ld [[GV:\$[0-9]+]], %got_disp(dwords)(
+; NEW-DAG: ld [[ARG1:\$[0-9]+]], 0([[VA]])
+; NEW-DAG: sd [[ARG1]], 8([[GV]])
+
+; ALL: # ANCHOR2
+
+; Increment [[VA]] again.
+; FIXME: We're still aligned from the last one but CodeGen doesn't spot that.
+; O32: lw [[VA:\$[0-9]+]], 0([[SP]])
+; O32-DAG: addiu [[VA_TMP0:\$[0-9]+]], [[VA]], 7
+; O32-DAG: and [[VA_TMP2:\$[0-9]+]], [[VA_TMP0]], [[VA_TMP1]]
+; O32-DAG: ori [[VA2:\$[0-9]+]], [[VA_TMP2]], 4
+; O32-DAG: sw [[VA2]], 0([[SP]])
+
+; N32-DAG: lw [[VA2:\$[0-9]+]], 0([[SP]])
+; N32-DAG: addiu [[VA3:\$[0-9]+]], [[VA2]], 8
+; N32-DAG: sw [[VA3]], 0([[SP]])
+
+; N64-DAG: ld [[VA2:\$[0-9]+]], 0([[SP]])
+; N64-DAG: daddiu [[VA3:\$[0-9]+]], [[VA2]], 8
+; N64-DAG: sd [[VA3]], 0([[SP]])
+
+; Load the second argument from the variable portion and copy it to the global.
+; O32-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA]])
+; O32-DAG: sw [[ARG2]], 16([[GV]])
+; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]])
+; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4
+; O32-DAG: sw [[VA2]], 0([[SP]])
+; O32-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA]])
+; O32-DAG: sw [[ARG2]], 20([[GV]])
+
+; NEW-DAG: ld [[ARG2:\$[0-9]+]], 0([[VA2]])
+; NEW-DAG: sd [[ARG2]], 16([[GV]])
+
+ %ap = alloca i8*, align 8
+ %ap2 = bitcast i8** %ap to i8*
+ call void @llvm.va_start(i8* %ap2)
+
+ call void asm sideeffect "# ANCHOR1", ""()
+ %arg1 = va_arg i8** %ap, i64
+ %e1 = getelementptr [3 x i64]* @dwords, i32 0, i32 1
+ store volatile i64 %arg1, i64* %e1, align 8
+
+ call void asm sideeffect "# ANCHOR2", ""()
+ %arg2 = va_arg i8** %ap, i64
+ %e2 = getelementptr [3 x i64]* @dwords, i32 0, i32 2
+ store volatile i64 %arg2, i64* %e2, align 8
+
+ call void @llvm.va_end(i8* %ap2)
+
+ ret void
+}
+
+define void @fn_i32_dotdotdot_i16(i32 %a, ...) {
+entry:
+; ALL-LABEL: fn_i32_dotdotdot_i16:
+
+; Set up the stack with an 8-byte local area. N32/N64 must also make room for
+; the argument save area (56 bytes).
+; O32: addiu [[SP:\$sp]], $sp, -8
+; N32: addiu [[SP:\$sp]], $sp, -64
+; N64: daddiu [[SP:\$sp]], $sp, -64
+
+; Save variable argument portion on the stack
+; O32-DAG: sw $7, 20([[SP]])
+; O32-DAG: sw $6, 16([[SP]])
+; O32-DAG: sw $5, 12([[SP]])
+
+; NEW-DAG: sd $11, 56([[SP]])
+; NEW-DAG: sd $10, 48([[SP]])
+; NEW-DAG: sd $9, 40([[SP]])
+; NEW-DAG: sd $8, 32([[SP]])
+; NEW-DAG: sd $7, 24([[SP]])
+; NEW-DAG: sd $6, 16([[SP]])
+; NEW-DAG: sd $5, 8([[SP]])
+
+; Initialize variable argument pointer.
+; For O32, the offset is 12 due to the 4 bytes used to store local variables,
+; 4 bytes padding to maintain stack alignment, and the 4 byte slot for the first
+; fixed argument.
+; For N32/N64, it is only 8 since the fixed arguments do not reserve stack
+; space.
+; O32-DAG: addiu [[VA:\$[0-9]+]], [[SP]], 12
+; O32-DAG: sw [[VA]], 0([[SP]])
+
+; N32-DAG: addiu [[VA:\$[0-9]+]], [[SP]], 8
+; N32-DAG: sw [[VA]], 0([[SP]])
+
+; N64-DAG: daddiu [[VA:\$[0-9]+]], [[SP]], 8
+; N64-DAG: sd [[VA]], 0([[SP]])
+
+; Store [[VA]]
+; O32-DAG: sw [[VA]], 0([[SP]])
+
+; ALL: # ANCHOR1
+
+; Increment [[VA]]
+; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]])
+; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4
+; O32-DAG: sw [[VA2]], 0([[SP]])
+
+; N32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]])
+; N32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 8
+; N32-DAG: sw [[VA2]], 0([[SP]])
+
+; N64-DAG: ld [[VA:\$[0-9]+]], 0([[SP]])
+; N64-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 8
+; N64-DAG: sd [[VA2]], 0([[SP]])
+
+; Load the first argument from the variable portion.
+; This has used the stack pointer directly rather than the [[VA]] we just set
+; up.
+; Big-endian mode for N32/N64 must add an additional 4 to the offset due to byte
+; order.
+; O32-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]])
+
+; NEW-LE-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]])
+; NEW-BE-DAG: lw [[ARG1:\$[0-9]+]], 4([[VA]])
+
+; Copy the arg to the global
+; O32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(hwords)
+
+; N32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(hwords)
+
+; N64-DAG: ld [[GV:\$[0-9]+]], %got_disp(hwords)(
+
+; ALL-DAG: sh [[ARG1]], 2([[GV]])
+
+; ALL: # ANCHOR2
+
+; Increment [[VA]] again.
+; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]])
+; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4
+; O32-DAG: sw [[VA2]], 0([[SP]])
+
+; N32-DAG: lw [[VA2:\$[0-9]+]], 0([[SP]])
+; N32-DAG: addiu [[VA3:\$[0-9]+]], [[VA2]], 8
+; N32-DAG: sw [[VA3]], 0([[SP]])
+
+; N64-DAG: ld [[VA2:\$[0-9]+]], 0([[SP]])
+; N64-DAG: daddiu [[VA3:\$[0-9]+]], [[VA2]], 8
+; N64-DAG: sd [[VA3]], 0([[SP]])
+
+; Load the second argument from the variable portion.
+; O32-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA]])
+
+; NEW-LE-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA2]])
+; NEW-BE-DAG: lw [[ARG2:\$[0-9]+]], 4([[VA2]])
+
+; Copy the arg to the global
+; ALL-DAG: sh [[ARG2]], 4([[GV]])
+
+ %ap = alloca i8*, align 8
+ %ap2 = bitcast i8** %ap to i8*
+ call void @llvm.va_start(i8* %ap2)
+
+ call void asm sideeffect "# ANCHOR1", ""()
+ %arg1 = va_arg i8** %ap, i16
+ %e1 = getelementptr [3 x i16]* @hwords, i32 0, i32 1
+ store volatile i16 %arg1, i16* %e1, align 2
+
+ call void asm sideeffect "# ANCHOR2", ""()
+ %arg2 = va_arg i8** %ap, i16
+ %e2 = getelementptr [3 x i16]* @hwords, i32 0, i32 2
+ store volatile i16 %arg2, i16* %e2, align 2
+
+ call void @llvm.va_end(i8* %ap2)
+
+ ret void
+}
+
+define void @fn_i32_dotdotdot_i32(i32 %a, ...) {
+entry:
+; ALL-LABEL: fn_i32_dotdotdot_i32:
+
+; Set up the stack with an 8-byte local area. N32/N64 must also make room for
+; the argument save area (56 bytes).
+; O32: addiu [[SP:\$sp]], $sp, -8
+; N32: addiu [[SP:\$sp]], $sp, -64
+; N64: daddiu [[SP:\$sp]], $sp, -64
+
+; Save variable argument portion on the stack
+; O32-DAG: sw $7, 20([[SP]])
+; O32-DAG: sw $6, 16([[SP]])
+; O32-DAG: sw $5, 12([[SP]])
+
+; NEW-DAG: sd $11, 56([[SP]])
+; NEW-DAG: sd $10, 48([[SP]])
+; NEW-DAG: sd $9, 40([[SP]])
+; NEW-DAG: sd $8, 32([[SP]])
+; NEW-DAG: sd $7, 24([[SP]])
+; NEW-DAG: sd $6, 16([[SP]])
+; NEW-DAG: sd $5, 8([[SP]])
+
+; Initialize variable argument pointer.
+; For O32, the offset is 12 due to the 4 bytes used to store local variables,
+; 4 bytes padding to maintain stack alignment, and the 4 byte slot for the first
+; fixed argument.
+; For N32/N64, it is only 8 since the fixed arguments do not reserve stack
+; space.
+; O32-DAG: addiu [[VA:\$[0-9]+]], [[SP]], 12
+; O32-DAG: sw [[VA]], 0([[SP]])
+
+; N32-DAG: addiu [[VA:\$[0-9]+]], [[SP]], 8
+; N32-DAG: sw [[VA]], 0([[SP]])
+
+; N64-DAG: daddiu [[VA:\$[0-9]+]], [[SP]], 8
+; N64-DAG: sd [[VA]], 0([[SP]])
+
+; Store [[VA]]
+; O32-DAG: sw [[VA]], 0([[SP]])
+
+; ALL: # ANCHOR1
+
+; Increment [[VA]]
+; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]])
+; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4
+; O32-DAG: sw [[VA2]], 0([[SP]])
+
+; N32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]])
+; N32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 8
+; N32-DAG: sw [[VA2]], 0([[SP]])
+
+; N64-DAG: ld [[VA:\$[0-9]+]], 0([[SP]])
+; N64-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 8
+; N64-DAG: sd [[VA2]], 0([[SP]])
+
+; Load the first argument from the variable portion.
+; This has used the stack pointer directly rather than the [[VA]] we just set
+; up.
+; Big-endian mode for N32/N64 must add an additional 4 to the offset due to byte
+; order.
+; O32-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]])
+
+; NEW-LE-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]])
+; NEW-BE-DAG: lw [[ARG1:\$[0-9]+]], 4([[VA]])
+
+; Copy the arg to the global
+; O32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(words)
+
+; N32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(words)
+
+; N64-DAG: ld [[GV:\$[0-9]+]], %got_disp(words)(
+
+; ALL-DAG: sw [[ARG1]], 4([[GV]])
+
+; ALL: # ANCHOR2
+
+; Increment [[VA]] again.
+; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]])
+; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4
+; O32-DAG: sw [[VA2]], 0([[SP]])
+
+; N32-DAG: lw [[VA2:\$[0-9]+]], 0([[SP]])
+; N32-DAG: addiu [[VA3:\$[0-9]+]], [[VA2]], 8
+; N32-DAG: sw [[VA3]], 0([[SP]])
+
+; N64-DAG: ld [[VA2:\$[0-9]+]], 0([[SP]])
+; N64-DAG: daddiu [[VA3:\$[0-9]+]], [[VA2]], 8
+; N64-DAG: sd [[VA3]], 0([[SP]])
+
+; Load the second argument from the variable portion.
+; O32-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA]])
+
+; NEW-LE-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA2]])
+; NEW-BE-DAG: lw [[ARG2:\$[0-9]+]], 4([[VA2]])
+
+; Copy the arg to the global
+; ALL-DAG: sw [[ARG2]], 8([[GV]])
+
+ %ap = alloca i8*, align 8
+ %ap2 = bitcast i8** %ap to i8*
+ call void @llvm.va_start(i8* %ap2)
+
+ call void asm sideeffect "# ANCHOR1", ""()
+ %arg1 = va_arg i8** %ap, i32
+ %e1 = getelementptr [3 x i32]* @words, i32 0, i32 1
+ store volatile i32 %arg1, i32* %e1, align 4
+
+ call void asm sideeffect "# ANCHOR2", ""()
+ %arg2 = va_arg i8** %ap, i32
+ %e2 = getelementptr [3 x i32]* @words, i32 0, i32 2
+ store volatile i32 %arg2, i32* %e2, align 4
+
+ call void @llvm.va_end(i8* %ap2)
+
+ ret void
+}
+
+define void @fn_i32_dotdotdot_i64(i32 %a, ...) {
+entry:
+; ALL-LABEL: fn_i32_dotdotdot_i64:
+
+; Set up the stack with an 8-byte local area. N32/N64 must also make room for
+; the argument save area (56 bytes).
+; O32: addiu [[SP:\$sp]], $sp, -8
+; N32: addiu [[SP:\$sp]], $sp, -64
+; N64: daddiu [[SP:\$sp]], $sp, -64
+
+; Save variable argument portion on the stack
+; O32-DAG: sw $7, 20([[SP]])
+; O32-DAG: sw $6, 16([[SP]])
+; O32-DAG: sw $5, 12([[SP]])
+
+; NEW-DAG: sd $11, 56([[SP]])
+; NEW-DAG: sd $10, 48([[SP]])
+; NEW-DAG: sd $9, 40([[SP]])
+; NEW-DAG: sd $8, 32([[SP]])
+; NEW-DAG: sd $7, 24([[SP]])
+; NEW-DAG: sd $6, 16([[SP]])
+; NEW-DAG: sd $5, 8([[SP]])
+
+; Initialize variable argument pointer.
+; For O32, the offset is 12 due to the 4 bytes used to store local variables,
+; 4 bytes padding to maintain stack alignment, and the 4 byte slot for the first
+; fixed argument.
+; For N32/N64, it is only 8 since the fixed arguments do not reserve stack
+; space.
+; O32-DAG: addiu [[VA:\$[0-9]+]], [[SP]], 12
+; O32-DAG: sw [[VA]], 0([[SP]])
+
+; N32-DAG: addiu [[VA:\$[0-9]+]], [[SP]], 8
+; N32-DAG: sw [[VA]], 0([[SP]])
+
+; N64-DAG: daddiu [[VA:\$[0-9]+]], [[SP]], 8
+; N64-DAG: sd [[VA]], 0([[SP]])
+
+; Store [[VA]]
+; O32-DAG: sw [[VA]], 0([[SP]])
+
+; ALL: # ANCHOR1
+
+; Increment [[VA]] (and realign pointer for O32)
+; O32: lw [[VA:\$[0-9]+]], 0([[SP]])
+; O32-DAG: addiu [[VA_TMP0:\$[0-9]+]], [[VA]], 7
+; O32-DAG: addiu [[VA_TMP1:\$[0-9]+]], $zero, -8
+; O32-DAG: and [[VA_TMP2:\$[0-9]+]], [[VA_TMP0]], [[VA_TMP1]]
+; O32-DAG: ori [[VA2:\$[0-9]+]], [[VA_TMP2]], 4
+; O32-DAG: sw [[VA2]], 0([[SP]])
+
+; N32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]])
+; N32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 8
+; N32-DAG: sw [[VA2]], 0([[SP]])
+
+; N64-DAG: ld [[VA:\$[0-9]+]], 0([[SP]])
+; N64-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 8
+; N64-DAG: sd [[VA2]], 0([[SP]])
+
+; Load the first argument from the variable portion and copy it to the global.
+; This has used the stack pointer directly rather than the [[VA]] we just set
+; up.
+; Big-endian mode for N32/N64 must add an additional 4 to the offset due to byte
+; order.
+; O32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(dwords)
+; O32-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]])
+; O32-DAG: sw [[ARG1]], 8([[GV]])
+; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]])
+; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4
+; O32-DAG: sw [[VA2]], 0([[SP]])
+; O32-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]])
+; O32-DAG: sw [[ARG1]], 12([[GV]])
+
+; N32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(dwords)
+; N64-DAG: ld [[GV:\$[0-9]+]], %got_disp(dwords)(
+; NEW-DAG: ld [[ARG1:\$[0-9]+]], 0([[VA]])
+; NEW-DAG: sd [[ARG1]], 8([[GV]])
+
+; ALL: # ANCHOR2
+
+; Increment [[VA]] again.
+; FIXME: We're still aligned from the last one but CodeGen doesn't spot that.
+; O32: lw [[VA:\$[0-9]+]], 0([[SP]])
+; O32-DAG: addiu [[VA_TMP0:\$[0-9]+]], [[VA]], 7
+; O32-DAG: and [[VA_TMP2:\$[0-9]+]], [[VA_TMP0]], [[VA_TMP1]]
+; O32-DAG: ori [[VA2:\$[0-9]+]], [[VA_TMP2]], 4
+; O32-DAG: sw [[VA2]], 0([[SP]])
+
+; N32-DAG: lw [[VA2:\$[0-9]+]], 0([[SP]])
+; N32-DAG: addiu [[VA3:\$[0-9]+]], [[VA2]], 8
+; N32-DAG: sw [[VA3]], 0([[SP]])
+
+; N64-DAG: ld [[VA2:\$[0-9]+]], 0([[SP]])
+; N64-DAG: daddiu [[VA3:\$[0-9]+]], [[VA2]], 8
+; N64-DAG: sd [[VA3]], 0([[SP]])
+
+; Load the second argument from the variable portion and copy it to the global.
+; O32-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA]])
+; O32-DAG: sw [[ARG2]], 16([[GV]])
+; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]])
+; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4
+; O32-DAG: sw [[VA2]], 0([[SP]])
+; O32-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA]])
+; O32-DAG: sw [[ARG2]], 20([[GV]])
+
+; NEW-DAG: ld [[ARG2:\$[0-9]+]], 0([[VA2]])
+; NEW-DAG: sd [[ARG2]], 16([[GV]])
+
+ %ap = alloca i8*, align 8
+ %ap2 = bitcast i8** %ap to i8*
+ call void @llvm.va_start(i8* %ap2)
+
+ call void asm sideeffect "# ANCHOR1", ""()
+ %arg1 = va_arg i8** %ap, i64
+ %e1 = getelementptr [3 x i64]* @dwords, i32 0, i32 1
+ store volatile i64 %arg1, i64* %e1, align 8
+
+ call void asm sideeffect "# ANCHOR2", ""()
+ %arg2 = va_arg i8** %ap, i64
+ %e2 = getelementptr [3 x i64]* @dwords, i32 0, i32 2
+ store volatile i64 %arg2, i64* %e2, align 8
+
+ call void @llvm.va_end(i8* %ap2)
+
+ ret void
+}
+
+define void @fn_i64_dotdotdot_i16(i64 %a, ...) {
+entry:
+; ALL-LABEL: fn_i64_dotdotdot_i16:
+
+; Set up the stack with an 8-byte local area. N32/N64 must also make room for
+; the argument save area (56 bytes).
+; O32: addiu [[SP:\$sp]], $sp, -8
+; N32: addiu [[SP:\$sp]], $sp, -64
+; N64: daddiu [[SP:\$sp]], $sp, -64
+
+; Save variable argument portion on the stack
+; O32-DAG: sw $7, 20([[SP]])
+; O32-DAG: sw $6, 16([[SP]])
+
+; NEW-DAG: sd $11, 56([[SP]])
+; NEW-DAG: sd $10, 48([[SP]])
+; NEW-DAG: sd $9, 40([[SP]])
+; NEW-DAG: sd $8, 32([[SP]])
+; NEW-DAG: sd $7, 24([[SP]])
+; NEW-DAG: sd $6, 16([[SP]])
+; NEW-DAG: sd $5, 8([[SP]])
+
+; Initialize variable argument pointer.
+; For O32, the offset is 16 due to the 4 bytes used to store local variables,
+; 4 bytes padding to maintain stack alignment, and the two 4 byte slots for the
+; first fixed argument.
+; For N32/N64, it is only 8 since the fixed arguments do not reserve stack
+; space.
+; O32-DAG: addiu [[VA:\$[0-9]+]], [[SP]], 16
+; O32-DAG: sw [[VA]], 0([[SP]])
+
+; N32-DAG: addiu [[VA:\$[0-9]+]], [[SP]], 8
+; N32-DAG: sw [[VA]], 0([[SP]])
+
+; N64-DAG: daddiu [[VA:\$[0-9]+]], [[SP]], 8
+; N64-DAG: sd [[VA]], 0([[SP]])
+
+; Store [[VA]]
+; O32-DAG: sw [[VA]], 0([[SP]])
+
+; ALL: # ANCHOR1
+
+; Increment [[VA]]
+; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]])
+; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4
+; O32-DAG: sw [[VA2]], 0([[SP]])
+
+; N32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]])
+; N32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 8
+; N32-DAG: sw [[VA2]], 0([[SP]])
+
+; N64-DAG: ld [[VA:\$[0-9]+]], 0([[SP]])
+; N64-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 8
+; N64-DAG: sd [[VA2]], 0([[SP]])
+
+; Load the first argument from the variable portion.
+; This has used the stack pointer directly rather than the [[VA]] we just set
+; up.
+; Big-endian mode for N32/N64 must add an additional 4 to the offset due to byte
+; order.
+; O32-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]])
+
+; NEW-LE-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]])
+; NEW-BE-DAG: lw [[ARG1:\$[0-9]+]], 4([[VA]])
+
+; Copy the arg to the global
+; O32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(hwords)
+
+; N32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(hwords)
+
+; N64-DAG: ld [[GV:\$[0-9]+]], %got_disp(hwords)(
+
+; ALL-DAG: sh [[ARG1]], 2([[GV]])
+
+; ALL: # ANCHOR2
+
+; Increment [[VA]] again.
+; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]])
+; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4
+; O32-DAG: sw [[VA2]], 0([[SP]])
+
+; N32-DAG: lw [[VA2:\$[0-9]+]], 0([[SP]])
+; N32-DAG: addiu [[VA3:\$[0-9]+]], [[VA2]], 8
+; N32-DAG: sw [[VA3]], 0([[SP]])
+
+; N64-DAG: ld [[VA2:\$[0-9]+]], 0([[SP]])
+; N64-DAG: daddiu [[VA3:\$[0-9]+]], [[VA2]], 8
+; N64-DAG: sd [[VA3]], 0([[SP]])
+
+; Load the second argument from the variable portion.
+; O32-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA]])
+
+; NEW-LE-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA2]])
+; NEW-BE-DAG: lw [[ARG2:\$[0-9]+]], 4([[VA2]])
+
+; Copy the arg to the global
+; ALL-DAG: sh [[ARG2]], 4([[GV]])
+
+ %ap = alloca i8*, align 8
+ %ap2 = bitcast i8** %ap to i8*
+ call void @llvm.va_start(i8* %ap2)
+
+ call void asm sideeffect "# ANCHOR1", ""()
+ %arg1 = va_arg i8** %ap, i16
+ %e1 = getelementptr [3 x i16]* @hwords, i32 0, i32 1
+ store volatile i16 %arg1, i16* %e1, align 2
+
+ call void asm sideeffect "# ANCHOR2", ""()
+ %arg2 = va_arg i8** %ap, i16
+ %e2 = getelementptr [3 x i16]* @hwords, i32 0, i32 2
+ store volatile i16 %arg2, i16* %e2, align 2
+
+ call void @llvm.va_end(i8* %ap2)
+
+ ret void
+}
+
+define void @fn_i64_dotdotdot_i32(i64 %a, ...) {
+entry:
+; ALL-LABEL: fn_i64_dotdotdot_i32:
+
+; Set up the stack with an 8-byte local area. N32/N64 must also make room for
+; the argument save area (56 bytes).
+; O32: addiu [[SP:\$sp]], $sp, -8
+; N32: addiu [[SP:\$sp]], $sp, -64
+; N64: daddiu [[SP:\$sp]], $sp, -64
+
+; Save variable argument portion on the stack
+; O32-DAG: sw $7, 20([[SP]])
+; O32-DAG: sw $6, 16([[SP]])
+
+; NEW-DAG: sd $11, 56([[SP]])
+; NEW-DAG: sd $10, 48([[SP]])
+; NEW-DAG: sd $9, 40([[SP]])
+; NEW-DAG: sd $8, 32([[SP]])
+; NEW-DAG: sd $7, 24([[SP]])
+; NEW-DAG: sd $6, 16([[SP]])
+; NEW-DAG: sd $5, 8([[SP]])
+
+; Initialize variable argument pointer.
+; For O32, the offset is 16 due to the 4 bytes used to store local variables,
+; 4 bytes padding to maintain stack alignment, and the two 4 byte slots for the
+; first fixed argument.
+; For N32/N64, it is only 8 since the fixed arguments do not reserve stack
+; space.
+; O32-DAG: addiu [[VA:\$[0-9]+]], [[SP]], 16
+; O32-DAG: sw [[VA]], 0([[SP]])
+
+; N32-DAG: addiu [[VA:\$[0-9]+]], [[SP]], 8
+; N32-DAG: sw [[VA]], 0([[SP]])
+
+; N64-DAG: daddiu [[VA:\$[0-9]+]], [[SP]], 8
+; N64-DAG: sd [[VA]], 0([[SP]])
+
+; Store [[VA]]
+; O32-DAG: sw [[VA]], 0([[SP]])
+
+; ALL: # ANCHOR1
+
+; Increment [[VA]]
+; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]])
+; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4
+; O32-DAG: sw [[VA2]], 0([[SP]])
+
+; N32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]])
+; N32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 8
+; N32-DAG: sw [[VA2]], 0([[SP]])
+
+; N64-DAG: ld [[VA:\$[0-9]+]], 0([[SP]])
+; N64-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 8
+; N64-DAG: sd [[VA2]], 0([[SP]])
+
+; Load the first argument from the variable portion.
+; This has used the stack pointer directly rather than the [[VA]] we just set
+; up.
+; Big-endian mode for N32/N64 must add an additional 4 to the offset due to byte
+; order.
+; O32-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]])
+
+; NEW-LE-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]])
+; NEW-BE-DAG: lw [[ARG1:\$[0-9]+]], 4([[VA]])
+
+; Copy the arg to the global
+; O32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(words)
+
+; N32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(words)
+
+; N64-DAG: ld [[GV:\$[0-9]+]], %got_disp(words)(
+
+; ALL-DAG: sw [[ARG1]], 4([[GV]])
+
+; ALL: # ANCHOR2
+
+; Increment [[VA]] again.
+; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]])
+; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4
+; O32-DAG: sw [[VA2]], 0([[SP]])
+
+; N32-DAG: lw [[VA2:\$[0-9]+]], 0([[SP]])
+; N32-DAG: addiu [[VA3:\$[0-9]+]], [[VA2]], 8
+; N32-DAG: sw [[VA3]], 0([[SP]])
+
+; N64-DAG: ld [[VA2:\$[0-9]+]], 0([[SP]])
+; N64-DAG: daddiu [[VA3:\$[0-9]+]], [[VA2]], 8
+; N64-DAG: sd [[VA3]], 0([[SP]])
+
+; Load the second argument from the variable portion.
+; O32-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA]])
+
+; NEW-LE-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA2]])
+; NEW-BE-DAG: lw [[ARG2:\$[0-9]+]], 4([[VA2]])
+
+; Copy the arg to the global
+; ALL-DAG: sw [[ARG2]], 8([[GV]])
+
+ %ap = alloca i8*, align 8
+ %ap2 = bitcast i8** %ap to i8*
+ call void @llvm.va_start(i8* %ap2)
+
+ call void asm sideeffect "# ANCHOR1", ""()
+ %arg1 = va_arg i8** %ap, i32
+ %e1 = getelementptr [3 x i32]* @words, i32 0, i32 1
+ store volatile i32 %arg1, i32* %e1, align 4
+
+ call void asm sideeffect "# ANCHOR2", ""()
+ %arg2 = va_arg i8** %ap, i32
+ %e2 = getelementptr [3 x i32]* @words, i32 0, i32 2
+ store volatile i32 %arg2, i32* %e2, align 4
+
+ call void @llvm.va_end(i8* %ap2)
+
+ ret void
+}
+
+define void @fn_i64_dotdotdot_i64(i64 %a, ...) {
+entry:
+; ALL-LABEL: fn_i64_dotdotdot_i64:
+
+; Set up the stack with an 8-byte local area. N32/N64 must also make room for
+; the argument save area (56 bytes).
+; O32: addiu [[SP:\$sp]], $sp, -8
+; N32: addiu [[SP:\$sp]], $sp, -64
+; N64: daddiu [[SP:\$sp]], $sp, -64
+
+; Save variable argument portion on the stack
+; O32-DAG: sw $7, 20([[SP]])
+; O32-DAG: sw $6, 16([[SP]])
+
+; NEW-DAG: sd $11, 56([[SP]])
+; NEW-DAG: sd $10, 48([[SP]])
+; NEW-DAG: sd $9, 40([[SP]])
+; NEW-DAG: sd $8, 32([[SP]])
+; NEW-DAG: sd $7, 24([[SP]])
+; NEW-DAG: sd $6, 16([[SP]])
+; NEW-DAG: sd $5, 8([[SP]])
+
+; Initialize variable argument pointer.
+; For O32, the offset is 16 due to the 4 bytes used to store local variables,
+; 4 bytes padding to maintain stack alignment, and the two 4 byte slots for the
+; first fixed argument.
+; For N32/N64, it is only 8 since the fixed arguments do not reserve stack
+; space.
+; O32-DAG: addiu [[VA:\$[0-9]+]], [[SP]], 16
+; O32-DAG: sw [[VA]], 0([[SP]])
+
+; N32-DAG: addiu [[VA:\$[0-9]+]], [[SP]], 8
+; N32-DAG: sw [[VA]], 0([[SP]])
+
+; N64-DAG: daddiu [[VA:\$[0-9]+]], [[SP]], 8
+; N64-DAG: sd [[VA]], 0([[SP]])
+
+; Store [[VA]]
+; O32-DAG: sw [[VA]], 0([[SP]])
+
+; ALL: # ANCHOR1
+
+; Increment [[VA]] (and realign pointer for O32)
+; O32: lw [[VA:\$[0-9]+]], 0([[SP]])
+; O32-DAG: addiu [[VA_TMP0:\$[0-9]+]], [[VA]], 7
+; O32-DAG: addiu [[VA_TMP1:\$[0-9]+]], $zero, -8
+; O32-DAG: and [[VA_TMP2:\$[0-9]+]], [[VA_TMP0]], [[VA_TMP1]]
+; O32-DAG: ori [[VA2:\$[0-9]+]], [[VA_TMP2]], 4
+; O32-DAG: sw [[VA2]], 0([[SP]])
+
+; N32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]])
+; N32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 8
+; N32-DAG: sw [[VA2]], 0([[SP]])
+
+; N64-DAG: ld [[VA:\$[0-9]+]], 0([[SP]])
+; N64-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 8
+; N64-DAG: sd [[VA2]], 0([[SP]])
+
+; Load the first argument from the variable portion and copy it to the global.
+; This has used the stack pointer directly rather than the [[VA]] we just set
+; up.
+; Big-endian mode for N32/N64 must add an additional 4 to the offset due to byte
+; order.
+; O32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(dwords)
+; O32-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]])
+; O32-DAG: sw [[ARG1]], 8([[GV]])
+; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]])
+; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4
+; O32-DAG: sw [[VA2]], 0([[SP]])
+; O32-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]])
+; O32-DAG: sw [[ARG1]], 12([[GV]])
+
+; N32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(dwords)
+; N64-DAG: ld [[GV:\$[0-9]+]], %got_disp(dwords)(
+; NEW-DAG: ld [[ARG1:\$[0-9]+]], 0([[VA]])
+; NEW-DAG: sd [[ARG1]], 8([[GV]])
+
+; ALL: # ANCHOR2
+
+; Increment [[VA]] again.
+; FIXME: We're still aligned from the last one but CodeGen doesn't spot that.
+; O32: lw [[VA:\$[0-9]+]], 0([[SP]])
+; O32-DAG: addiu [[VA_TMP0:\$[0-9]+]], [[VA]], 7
+; O32-DAG: and [[VA_TMP2:\$[0-9]+]], [[VA_TMP0]], [[VA_TMP1]]
+; O32-DAG: ori [[VA2:\$[0-9]+]], [[VA_TMP2]], 4
+; O32-DAG: sw [[VA2]], 0([[SP]])
+
+; N32-DAG: lw [[VA2:\$[0-9]+]], 0([[SP]])
+; N32-DAG: addiu [[VA3:\$[0-9]+]], [[VA2]], 8
+; N32-DAG: sw [[VA3]], 0([[SP]])
+
+; N64-DAG: ld [[VA2:\$[0-9]+]], 0([[SP]])
+; N64-DAG: daddiu [[VA3:\$[0-9]+]], [[VA2]], 8
+; N64-DAG: sd [[VA3]], 0([[SP]])
+
+; Load the second argument from the variable portion and copy it to the global.
+; O32-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA]])
+; O32-DAG: sw [[ARG2]], 16([[GV]])
+; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]])
+; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4
+; O32-DAG: sw [[VA2]], 0([[SP]])
+; O32-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA]])
+; O32-DAG: sw [[ARG2]], 20([[GV]])
+
+; NEW-DAG: ld [[ARG2:\$[0-9]+]], 0([[VA2]])
+; NEW-DAG: sd [[ARG2]], 16([[GV]])
+
+ %ap = alloca i8*, align 8
+ %ap2 = bitcast i8** %ap to i8*
+ call void @llvm.va_start(i8* %ap2)
+
+ call void asm sideeffect "# ANCHOR1", ""()
+ %arg1 = va_arg i8** %ap, i64
+ %e1 = getelementptr [3 x i64]* @dwords, i32 0, i32 1
+ store volatile i64 %arg1, i64* %e1, align 8
+
+ call void asm sideeffect "# ANCHOR2", ""()
+ %arg2 = va_arg i8** %ap, i64
+ %e2 = getelementptr [3 x i64]* @dwords, i32 0, i32 2
+ store volatile i64 %arg2, i64* %e2, align 8
+
+ call void @llvm.va_end(i8* %ap2)
+
+ ret void
+}
+
+declare void @llvm.va_start(i8*)
+declare void @llvm.va_end(i8*)
diff --git a/test/CodeGen/Mips/cconv/arguments.ll b/test/CodeGen/Mips/cconv/arguments.ll
index 8fe29f3c8ce7..43da6044408b 100644
--- a/test/CodeGen/Mips/cconv/arguments.ll
+++ b/test/CodeGen/Mips/cconv/arguments.ll
@@ -1,5 +1,5 @@
-; RUN: llc -march=mips -relocation-model=static < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32 --check-prefix=O32BE %s
-; RUN: llc -march=mipsel -relocation-model=static < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32 --check-prefix=O32LE %s
+; RUN: llc -march=mips -relocation-model=static < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32 %s
+; RUN: llc -march=mipsel -relocation-model=static < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32 %s
; RUN-TODO: llc -march=mips64 -relocation-model=static -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32 %s
; RUN-TODO: llc -march=mips64el -relocation-model=static -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32 %s
@@ -23,8 +23,10 @@
@floats = global [11 x float] zeroinitializer
@doubles = global [11 x double] zeroinitializer
-define void @align_to_arg_slots(i8 %a, i8 %b, i8 %c, i8 %d, i8 %e, i8 %f, i8 %g,
- i8 %h, i8 %i, i8 %j) nounwind {
+define void @align_to_arg_slots(i8 signext %a, i8 signext %b, i8 signext %c,
+ i8 signext %d, i8 signext %e, i8 signext %f,
+ i8 signext %g, i8 signext %h, i8 signext %i,
+ i8 signext %j) nounwind {
entry:
%0 = getelementptr [11 x i8]* @bytes, i32 0, i32 1
store volatile i8 %a, i8* %0
@@ -53,7 +55,7 @@ entry:
; We won't test the way the global address is calculated in this test. This is
; just to get the register number for the other checks.
; SYM32-DAG: addiu [[R1:\$[0-9]+]], ${{[0-9]+}}, %lo(bytes)
-; SYM64-DAG: ld [[R1:\$[0-9]]], %got_disp(bytes)(
+; SYM64-DAG: ld [[R1:\$[0-9]+]], %got_disp(bytes)(
; The first four arguments are the same in O32/N32/N64
; ALL-DAG: sb $4, 1([[R1]])
@@ -82,15 +84,16 @@ entry:
; increase by 4 for O32 and 8 for N32/N64.
; O32-DAG: lw [[R3:\$[0-9]+]], 32($sp)
; O32-DAG: sb [[R3]], 9([[R1]])
-; NEW-DAG: lw [[R3:\$[0-9]+]], 0($sp)
+; NEW-DAG: ld [[R3:\$[0-9]+]], 0($sp)
; NEW-DAG: sb [[R3]], 9([[R1]])
; O32-DAG: lw [[R3:\$[0-9]+]], 36($sp)
; O32-DAG: sb [[R3]], 10([[R1]])
-; NEW-DAG: lw [[R3:\$[0-9]+]], 8($sp)
+; NEW-DAG: ld [[R3:\$[0-9]+]], 8($sp)
; NEW-DAG: sb [[R3]], 10([[R1]])
-define void @slot_skipping(i8 %a, i64 %b, i8 %c, i8 %d,
- i8 %e, i8 %f, i8 %g, i64 %i, i8 %j) nounwind {
+define void @slot_skipping(i8 signext %a, i64 signext %b, i8 signext %c,
+ i8 signext %d, i8 signext %e, i8 signext %f,
+ i8 signext %g, i64 signext %i, i8 signext %j) nounwind {
entry:
%0 = getelementptr [11 x i8]* @bytes, i32 0, i32 1
store volatile i8 %a, i8* %0
@@ -117,9 +120,9 @@ entry:
; We won't test the way the global address is calculated in this test. This is
; just to get the register number for the other checks.
; SYM32-DAG: addiu [[R1:\$[0-9]+]], ${{[0-9]+}}, %lo(bytes)
-; SYM64-DAG: ld [[R1:\$[0-9]]], %got_disp(bytes)(
+; SYM64-DAG: ld [[R1:\$[0-9]+]], %got_disp(bytes)(
; SYM32-DAG: addiu [[R2:\$[0-9]+]], ${{[0-9]+}}, %lo(dwords)
-; SYM64-DAG: ld [[R2:\$[0-9]]], %got_disp(dwords)(
+; SYM64-DAG: ld [[R2:\$[0-9]+]], %got_disp(dwords)(
; The first argument is the same in O32/N32/N64.
; ALL-DAG: sb $4, 1([[R1]])
@@ -137,8 +140,7 @@ entry:
; It's not clear why O32 uses lbu for this argument, but it's not wrong so we'll
; accept it for now. The only IR difference is that this argument has
; anyext from i8 and align 8 on it.
-; O32LE-DAG: lbu [[R3:\$[0-9]+]], 16($sp)
-; O32BE-DAG: lbu [[R3:\$[0-9]+]], 19($sp)
+; O32-DAG: lw [[R3:\$[0-9]+]], 16($sp)
; O32-DAG: sb [[R3]], 2([[R1]])
; NEW-DAG: sb $6, 2([[R1]])
; O32-DAG: lw [[R3:\$[0-9]+]], 20($sp)
@@ -166,5 +168,5 @@ entry:
; increase by 4 for O32 and 8 for N32/N64.
; O32-DAG: lw [[R3:\$[0-9]+]], 48($sp)
; O32-DAG: sb [[R3]], 7([[R1]])
-; NEW-DAG: lw [[R3:\$[0-9]+]], 0($sp)
+; NEW-DAG: ld [[R3:\$[0-9]+]], 0($sp)
; NEW-DAG: sb [[R3]], 7([[R1]])
diff --git a/test/CodeGen/Mips/cconv/return-float.ll b/test/CodeGen/Mips/cconv/return-float.ll
index 28cf83d3efcf..d1a5e4f2fa9d 100644
--- a/test/CodeGen/Mips/cconv/return-float.ll
+++ b/test/CodeGen/Mips/cconv/return-float.ll
@@ -30,7 +30,7 @@ entry:
; O32-DAG: lw $2, %lo(float)([[R1]])
; N32-DAG: lui [[R1:\$[0-9]+]], %hi(float)
; N32-DAG: lw $2, %lo(float)([[R1]])
-; N64-DAG: ld [[R1:\$[0-9]+]], %got_disp(float)($1)
+; N64-DAG: ld [[R1:\$[0-9]+]], %got_disp(float)(
; N64-DAG: lw $2, 0([[R1]])
define double @retdouble() nounwind {
@@ -44,5 +44,5 @@ entry:
; O32-DAG: addiu [[R2:\$[0-9]+]], [[R1]], %lo(double)
; O32-DAG: lw $3, 4([[R2]])
; N32-DAG: ld $2, %lo(double)([[R1:\$[0-9]+]])
-; N64-DAG: ld [[R1:\$[0-9]+]], %got_disp(double)($1)
+; N64-DAG: ld [[R1:\$[0-9]+]], %got_disp(double)(
; N64-DAG: ld $2, 0([[R1]])
diff --git a/test/CodeGen/Mips/cconv/return-hard-float.ll b/test/CodeGen/Mips/cconv/return-hard-float.ll
index 3eb26fa9d24f..123b499185a9 100644
--- a/test/CodeGen/Mips/cconv/return-hard-float.ll
+++ b/test/CodeGen/Mips/cconv/return-hard-float.ll
@@ -33,7 +33,7 @@ entry:
; O32-DAG: lwc1 $f0, %lo(float)([[R1]])
; N32-DAG: lui [[R1:\$[0-9]+]], %hi(float)
; N32-DAG: lwc1 $f0, %lo(float)([[R1]])
-; N64-DAG: ld [[R1:\$[0-9]+]], %got_disp(float)($1)
+; N64-DAG: ld [[R1:\$[0-9]+]], %got_disp(float)(
; N64-DAG: lwc1 $f0, 0([[R1]])
define double @retdouble() nounwind {
@@ -45,7 +45,7 @@ entry:
; ALL-LABEL: retdouble:
; O32-DAG: ldc1 $f0, %lo(double)([[R1:\$[0-9]+]])
; N32-DAG: ldc1 $f0, %lo(double)([[R1:\$[0-9]+]])
-; N64-DAG: ld [[R1:\$[0-9]+]], %got_disp(double)($1)
+; N64-DAG: ld [[R1:\$[0-9]+]], %got_disp(double)(
; N64-DAG: ldc1 $f0, 0([[R1]])
define { double, double } @retComplexDouble() #0 {
diff --git a/test/CodeGen/Mips/cconv/return-hard-struct-f128.ll b/test/CodeGen/Mips/cconv/return-hard-struct-f128.ll
new file mode 100644
index 000000000000..2e8447710281
--- /dev/null
+++ b/test/CodeGen/Mips/cconv/return-hard-struct-f128.ll
@@ -0,0 +1,36 @@
+; RUN: llc -mtriple=mips64-linux-gnu -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=N32 %s
+; RUN: llc -mtriple=mips64el-linux-gnu -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=N32 %s
+
+; RUN: llc -mtriple=mips64-linux-gnu -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64 %s
+; RUN: llc -mtriple=mips64el-linux-gnu -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64 %s
+
+; Test return of {fp128} agrees with de-facto N32/N64 ABI.
+
+@struct_fp128 = global {fp128} zeroinitializer
+
+define inreg {fp128} @ret_struct_fp128() nounwind {
+entry:
+ %0 = load volatile {fp128}* @struct_fp128
+ ret {fp128} %0
+}
+
+; ALL-LABEL: ret_struct_fp128:
+
+; O32 generates different IR so we don't test it here. It returns the struct
+; indirectly.
+
+; Contrary to the N32/N64 ABI documentation, a struct containing a long double
+; is returned in $f0, and $f1 instead of the usual $f0, and $f2. This is to
+; match the de facto ABI as implemented by GCC.
+; N32-DAG: lui [[R1:\$[0-9]+]], %hi(struct_fp128)
+; N32-DAG: ld [[R2:\$[0-9]+]], %lo(struct_fp128)([[R1]])
+; N32-DAG: dmtc1 [[R2]], $f0
+; N32-DAG: addiu [[R3:\$[0-9]+]], [[R1]], %lo(struct_fp128)
+; N32-DAG: ld [[R4:\$[0-9]+]], 8([[R3]])
+; N32-DAG: dmtc1 [[R4]], $f1
+
+; N64-DAG: ld [[R1:\$[0-9]+]], %got_disp(struct_fp128)($1)
+; N64-DAG: ld [[R2:\$[0-9]+]], 0([[R1]])
+; N64-DAG: dmtc1 [[R2]], $f0
+; N64-DAG: ld [[R4:\$[0-9]+]], 8([[R1]])
+; N64-DAG: dmtc1 [[R4]], $f1
diff --git a/test/CodeGen/Mips/cconv/return-struct.ll b/test/CodeGen/Mips/cconv/return-struct.ll
new file mode 100644
index 000000000000..11a8cf032148
--- /dev/null
+++ b/test/CodeGen/Mips/cconv/return-struct.ll
@@ -0,0 +1,232 @@
+; RUN: llc -mtriple=mips-linux-gnu -relocation-model=static < %s | FileCheck --check-prefix=ALL --check-prefix=O32 --check-prefix=O32-BE %s
+; RUN: llc -mtriple=mipsel-linux-gnu -relocation-model=static < %s | FileCheck --check-prefix=ALL --check-prefix=O32 --check-prefix=O32-LE %s
+
+; RUN-TODO: llc -mtriple=mips64-linux-gnu -relocation-model=static -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
+; RUN-TODO: llc -mtriple=mips64el-linux-gnu -relocation-model=static -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
+
+; RUN: llc -mtriple=mips64-linux-gnu -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=N32 --check-prefix=N32-BE %s
+; RUN: llc -mtriple=mips64el-linux-gnu -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=N32 --check-prefix=N32-LE %s
+
+; RUN: llc -mtriple=mips64-linux-gnu -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64 --check-prefix=N64-BE %s
+; RUN: llc -mtriple=mips64el-linux-gnu -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64 --check-prefix=N64-LE %s
+
+; Test struct returns for all ABI's and byte orders.
+
+@struct_byte = global {i8} zeroinitializer
+@struct_2byte = global {i8,i8} zeroinitializer
+@struct_3xi16 = global {[3 x i16]} zeroinitializer
+@struct_6xi32 = global {[6 x i32]} zeroinitializer
+@struct_128xi16 = global {[128 x i16]} zeroinitializer
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1)
+
+define inreg {i8} @ret_struct_i8() nounwind {
+entry:
+ %0 = load volatile {i8}* @struct_byte
+ ret {i8} %0
+}
+
+; ALL-LABEL: ret_struct_i8:
+; O32-DAG: lui [[R1:\$[0-9]+]], %hi(struct_byte)
+; O32-DAG: lbu $2, %lo(struct_byte)([[R1]])
+
+; N32-LE-DAG: lui [[R1:\$[0-9]+]], %hi(struct_byte)
+; N32-LE-DAG: lb $2, %lo(struct_byte)([[R1]])
+
+; N32-BE-DAG: lui [[R1:\$[0-9]+]], %hi(struct_byte)
+; N32-BE-DAG: lb [[R2:\$[0-9]+]], %lo(struct_byte)([[R1]])
+; N32-BE-DAG: dsll $2, [[R2]], 56
+
+; N64-LE-DAG: ld [[R1:\$[0-9]+]], %got_disp(struct_byte)($1)
+; N64-LE-DAG: lb $2, 0([[R1]])
+
+; N64-BE-DAG: ld [[R1:\$[0-9]+]], %got_disp(struct_byte)($1)
+; N64-BE-DAG: lb [[R2:\$[0-9]+]], 0([[R1]])
+; N64-BE-DAG: dsll $2, [[R2]], 56
+
+; This test is based on the way clang currently lowers {i8,i8} to {i16}.
+; FIXME: It should probably work for without any lowering too but this doesn't
+; work as expected. Each member gets mapped to a register rather than
+; packed into a single register.
+define inreg {i16} @ret_struct_i16() nounwind {
+entry:
+ %retval = alloca {i8,i8}, align 1
+ %0 = bitcast {i8,i8}* %retval to i8*
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* getelementptr inbounds ({i8,i8}* @struct_2byte, i32 0, i32 0), i64 2, i32 1, i1 false)
+ %1 = bitcast {i8,i8}* %retval to {i16}*
+ %2 = load volatile {i16}* %1
+ ret {i16} %2
+}
+
+; ALL-LABEL: ret_struct_i16:
+; O32-DAG: lui [[R1:\$[0-9]+]], %hi(struct_2byte)
+; O32-DAG: lhu [[R2:\$[0-9]+]], %lo(struct_2byte)([[R1]])
+; O32-DAG: sh [[R2]], 0([[SP:\$sp]])
+; O32-DAG: lhu $2, 0([[SP:\$sp]])
+
+; N32-LE-DAG: lui [[R1:\$[0-9]+]], %hi(struct_2byte)
+; N32-LE-DAG: lhu [[R2:\$[0-9]+]], %lo(struct_2byte)([[R1]])
+; N32-LE-DAG: sh [[R2]], 8([[SP:\$sp]])
+; N32-LE-DAG: lh $2, 8([[SP:\$sp]])
+
+; N32-BE-DAG: lui [[R1:\$[0-9]+]], %hi(struct_2byte)
+; N32-BE-DAG: lhu [[R2:\$[0-9]+]], %lo(struct_2byte)([[R1]])
+; N32-BE-DAG: sh [[R2]], 8([[SP:\$sp]])
+; N32-BE-DAG: lh [[R3:\$[0-9]+]], 8([[SP:\$sp]])
+; N32-BE-DAG: dsll $2, [[R3]], 48
+
+; N64-LE-DAG: ld [[R1:\$[0-9]+]], %got_disp(struct_2byte)($1)
+; N64-LE-DAG: lhu [[R2:\$[0-9]+]], 0([[R1]])
+; N64-LE-DAG: sh [[R2]], 8([[SP:\$sp]])
+; N64-LE-DAG: lh $2, 8([[SP:\$sp]])
+
+; N64-BE-DAG: ld [[R1:\$[0-9]+]], %got_disp(struct_2byte)($1)
+; N64-BE-DAG: lhu [[R2:\$[0-9]+]], 0([[R1]])
+; N64-BE-DAG: sh [[R2]], 8([[SP:\$sp]])
+; N64-BE-DAG: lh [[R3:\$[0-9]+]], 8([[SP:\$sp]])
+; N64-BE-DAG: dsll $2, [[R3]], 48
+
+; Ensure that structures bigger than 32-bits but smaller than 64-bits are
+; also returned in the upper bits on big endian targets. Previously, these were
+; missed by the CCPromoteToType and the shift didn't happen.
+define inreg {i48} @ret_struct_3xi16() nounwind {
+entry:
+ %0 = load volatile i48* bitcast ({[3 x i16]}* @struct_3xi16 to i48*), align 2
+ %1 = insertvalue {i48} undef, i48 %0, 0
+ ret {i48} %1
+}
+
+; ALL-LABEL: ret_struct_3xi16:
+
+; O32-BE-DAG: lui [[PTR_HI:\$[0-9]+]], %hi(struct_3xi16)
+; O32-BE-DAG: addiu [[PTR_LO:\$[0-9]+]], [[PTR_HI]], %lo(struct_3xi16)
+; O32-BE-DAG: lhu [[R1:\$[0-9]+]], 4([[PTR_LO]])
+; O32-BE-DAG: lw [[R2:\$[0-9]+]], %lo(struct_3xi16)([[PTR_HI]])
+; O32-BE-DAG: sll [[R3:\$[0-9]+]], [[R2]], 16
+; O32-BE-DAG: or $3, [[R1]], [[R3]]
+; O32-BE-DAG: srl $2, [[R2]], 16
+
+; O32-LE-DAG: lui [[PTR_HI:\$[0-9]+]], %hi(struct_3xi16)
+; O32-LE-DAG: addiu [[PTR_LO:\$[0-9]+]], [[PTR_HI]], %lo(struct_3xi16)
+; O32-LE-DAG: lhu $3, 4([[PTR_LO]])
+; O32-LE-DAG: lw $2, %lo(struct_3xi16)([[PTR_HI]])
+
+; N32-LE-DAG: lui [[PTR_HI:\$[0-9]+]], %hi(struct_3xi16)
+; N32-LE-DAG: addiu [[PTR_LO:\$[0-9]+]], [[PTR_HI]], %lo(struct_3xi16)
+; N32-LE-DAG: lh [[R1:\$[0-9]+]], 4([[PTR_LO]])
+; N32-LE-DAG: lwu [[R2:\$[0-9]+]], %lo(struct_3xi16)([[PTR_HI]])
+; N32-LE-DAG: dsll [[R3:\$[0-9]+]], [[R1]], 32
+; N32-LE-DAG: or $2, [[R2]], [[R3]]
+
+; N32-BE-DAG: lui [[PTR_HI:\$[0-9]+]], %hi(struct_3xi16)
+; N32-BE-DAG: addiu [[PTR_LO:\$[0-9]+]], [[PTR_HI]], %lo(struct_3xi16)
+; N32-BE-DAG: lw [[R1:\$[0-9]+]], %lo(struct_3xi16)([[PTR_HI]])
+; N32-BE-DAG: dsll [[R2:\$[0-9]+]], [[R1]], 16
+; N32-BE-DAG: lhu [[R3:\$[0-9]+]], 4([[PTR_LO]])
+; N32-BE-DAG: or [[R4:\$[0-9]+]], [[R3]], [[R2]]
+; N32-BE-DAG: dsll $2, [[R4]], 16
+
+; N64-LE-DAG: ld [[PTR:\$[0-9]+]], %got_disp(struct_3xi16)($1)
+; N64-LE-DAG: lh [[R1:\$[0-9]+]], 4([[PTR]])
+; N64-LE-DAG: lwu [[R2:\$[0-9]+]], 0([[PTR]])
+; N64-LE-DAG: dsll [[R3:\$[0-9]+]], [[R1]], 32
+; N64-LE-DAG: or $2, [[R2]], [[R3]]
+
+; N64-BE-DAG: ld [[PTR:\$[0-9]+]], %got_disp(struct_3xi16)($1)
+; N64-BE-DAG: lw [[R1:\$[0-9]+]], 0([[PTR]])
+; N64-BE-DAG: dsll [[R2:\$[0-9]+]], [[R1]], 16
+; N64-BE-DAG: lhu [[R3:\$[0-9]+]], 4([[PTR]])
+; N64-BE-DAG: or [[R4:\$[0-9]+]], [[R3]], [[R2]]
+; N32-BE-DAG: dsll $2, [[R4]], 16
+
+; Ensure that large structures (>128-bit) are returned indirectly.
+; We pick an extremely large structure so we don't have to match inlined memcpy's.
+define void @ret_struct_128xi16({[128 x i16]}* sret %returnval) {
+entry:
+ %0 = bitcast {[128 x i16]}* %returnval to i8*
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast ({[128 x i16]}* @struct_128xi16 to i8*), i64 256, i32 2, i1 false)
+ ret void
+}
+
+; ALL-LABEL: ret_struct_128xi16:
+
+; sret pointer is already in $4
+; O32-DAG: lui [[PTR:\$[0-9]+]], %hi(struct_128xi16)
+; O32-DAG: addiu $5, [[PTR]], %lo(struct_128xi16)
+; O32: jal memcpy
+
+; sret pointer is already in $4
+; N32-DAG: lui [[PTR_HI:\$[0-9]+]], %hi(struct_128xi16)
+; N32-DAG: addiu [[PTR:\$[0-9]+]], [[PTR_HI]], %lo(struct_128xi16)
+; FIXME: This signext isn't necessary. Like integers, pointers are
+; but unlike integers, pointers cannot have the signext attribute.
+; N32-DAG: sll $5, [[PTR]], 0
+; N32: jal memcpy
+
+; sret pointer is already in $4
+; N64-DAG: ld $5, %got_disp(struct_128xi16)(
+; N64-DAG: ld $25, %call16(memcpy)(
+; N64: jalr $25
+
+; Ensure that large structures (>128-bit) are returned indirectly.
+; This will generate inlined memcpy's anyway so pick the smallest large
+; structure
+; This time we let the backend lower the sret argument.
+define {[6 x i32]} @ret_struct_6xi32() {
+entry:
+ %0 = load volatile {[6 x i32]}* @struct_6xi32, align 2
+ ret {[6 x i32]} %0
+}
+
+; ALL-LABEL: ret_struct_6xi32:
+
+; sret pointer is already in $4
+; O32-DAG: lui [[PTR_HI:\$[0-9]+]], %hi(struct_6xi32)
+; O32-DAG: addiu [[PTR:\$[0-9]+]], [[PTR_HI]], %lo(struct_6xi32)
+; O32-DAG: lw [[T0:\$[0-9]+]], %lo(struct_6xi32)([[PTR]])
+; O32-DAG: lw [[T1:\$[0-9]+]], 4([[PTR]])
+; O32-DAG: lw [[T2:\$[0-9]+]], 8([[PTR]])
+; O32-DAG: lw [[T3:\$[0-9]+]], 12([[PTR]])
+; O32-DAG: lw [[T4:\$[0-9]+]], 16([[PTR]])
+; O32-DAG: lw [[T5:\$[0-9]+]], 20([[PTR]])
+; O32-DAG: sw [[T0]], 0($4)
+; O32-DAG: sw [[T1]], 4($4)
+; O32-DAG: sw [[T2]], 8($4)
+; O32-DAG: sw [[T3]], 12($4)
+; O32-DAG: sw [[T4]], 16($4)
+; O32-DAG: sw [[T5]], 20($4)
+
+; FIXME: This signext isn't necessary. Like integers, pointers are
+; but unlike integers, pointers cannot have the signext attribute.
+; In this case we don't have anywhere to put the signext either since
+; the sret argument is invented by the backend.
+; N32-DAG: sll [[RET_PTR:\$[0-9]+]], $4, 0
+; N32-DAG: lui [[PTR_HI:\$[0-9]+]], %hi(struct_6xi32)
+; N32-DAG: addiu [[PTR:\$[0-9]+]], [[PTR_HI]], %lo(struct_6xi32)
+; N32-DAG: lw [[T0:\$[0-9]+]], %lo(struct_6xi32)([[PTR]])
+; N32-DAG: lw [[T1:\$[0-9]+]], 4([[PTR]])
+; N32-DAG: lw [[T2:\$[0-9]+]], 8([[PTR]])
+; N32-DAG: lw [[T3:\$[0-9]+]], 12([[PTR]])
+; N32-DAG: lw [[T4:\$[0-9]+]], 16([[PTR]])
+; N32-DAG: lw [[T5:\$[0-9]+]], 20([[PTR]])
+; N32-DAG: sw [[T0]], 0([[RET_PTR]])
+; N32-DAG: sw [[T1]], 4([[RET_PTR]])
+; N32-DAG: sw [[T2]], 8([[RET_PTR]])
+; N32-DAG: sw [[T3]], 12([[RET_PTR]])
+; N32-DAG: sw [[T4]], 16([[RET_PTR]])
+; N32-DAG: sw [[T5]], 20([[RET_PTR]])
+
+; sret pointer is already in $4
+; N64-DAG: ld [[PTR:\$[0-9]+]], %got_disp(struct_6xi32)(
+; N64-DAG: lw [[T0:\$[0-9]+]], 0([[PTR]])
+; N64-DAG: lw [[T1:\$[0-9]+]], 4([[PTR]])
+; N64-DAG: lw [[T2:\$[0-9]+]], 8([[PTR]])
+; N64-DAG: lw [[T3:\$[0-9]+]], 12([[PTR]])
+; N64-DAG: lw [[T4:\$[0-9]+]], 16([[PTR]])
+; N64-DAG: lw [[T5:\$[0-9]+]], 20([[PTR]])
+; N64-DAG: sw [[T0]], 0($4)
+; N64-DAG: sw [[T1]], 4($4)
+; N64-DAG: sw [[T2]], 8($4)
+; N64-DAG: sw [[T3]], 12($4)
+; N64-DAG: sw [[T4]], 16($4)
+; N64-DAG: sw [[T5]], 20($4)
diff --git a/test/CodeGen/Mips/cconv/return.ll b/test/CodeGen/Mips/cconv/return.ll
index 76ce5e44c4ae..63f9b5f45a18 100644
--- a/test/CodeGen/Mips/cconv/return.ll
+++ b/test/CodeGen/Mips/cconv/return.ll
@@ -33,7 +33,7 @@ entry:
; O32-DAG: lbu $2, %lo(byte)([[R1]])
; N32-DAG: lui [[R1:\$[0-9]+]], %hi(byte)
; N32-DAG: lbu $2, %lo(byte)([[R1]])
-; N64-DAG: ld [[R1:\$[0-9]+]], %got_disp(byte)($1)
+; N64-DAG: ld [[R1:\$[0-9]+]], %got_disp(byte)(
; N64-DAG: lbu $2, 0([[R1]])
define i32 @reti32() nounwind {
@@ -47,7 +47,7 @@ entry:
; O32-DAG: lw $2, %lo(word)([[R1]])
; N32-DAG: lui [[R1:\$[0-9]+]], %hi(word)
; N32-DAG: lw $2, %lo(word)([[R1]])
-; N64-DAG: ld [[R1:\$[0-9]+]], %got_disp(word)($1)
+; N64-DAG: ld [[R1:\$[0-9]+]], %got_disp(word)(
; N64-DAG: lw $2, 0([[R1]])
define i64 @reti64() nounwind {
diff --git a/test/CodeGen/Mips/cmov.ll b/test/CodeGen/Mips/cmov.ll
index 0c13fb1adfbe..e548049ab346 100755
--- a/test/CodeGen/Mips/cmov.ll
+++ b/test/CodeGen/Mips/cmov.ll
@@ -38,7 +38,7 @@
; 64-CMP-DAG: or $[[T2:[0-9]+]], $[[T0]], $[[T1]]
; 64-CMP-DAG: ld $2, 0($[[T2]])
-define i32* @cmov1(i32 %s) nounwind readonly {
+define i32* @cmov1(i32 signext %s) nounwind readonly {
entry:
%tobool = icmp ne i32 %s, 0
%tmp1 = load i32** @i3, align 4
@@ -78,7 +78,7 @@ entry:
; 64-CMP-DAG: or $[[T2:[0-9]+]], $[[T0]], $[[T1]]
; 64-CMP-DAG: lw $2, 0($[[T2]])
-define i32 @cmov2(i32 %s) nounwind readonly {
+define i32 @cmov2(i32 signext %s) nounwind readonly {
entry:
%tobool = icmp ne i32 %s, 0
%tmp1 = load i32* @c, align 4
@@ -109,7 +109,7 @@ entry:
; 64-CMP-DAG: selnez $[[T1:[0-9]+]], $6, $[[CC]]
; 64-CMP-DAG: or $2, $[[T0]], $[[T1]]
-define i32 @cmov3(i32 %a, i32 %b, i32 %c) nounwind readnone {
+define i32 @cmov3(i32 signext %a, i32 signext %b, i32 signext %c) nounwind readnone {
entry:
%cmp = icmp eq i32 %a, 234
%cond = select i1 %cmp, i32 %b, i32 %c
@@ -142,7 +142,7 @@ entry:
; 64-CMP-DAG: seleqz $[[T1:[0-9]+]], $6, $[[CC]]
; 64-CMP-DAG: or $2, $[[T0]], $[[T1]]
-define i32 @cmov3_ne(i32 %a, i32 %b, i32 %c) nounwind readnone {
+define i32 @cmov3_ne(i32 signext %a, i32 signext %b, i32 signext %c) nounwind readnone {
entry:
%cmp = icmp ne i32 %a, 234
%cond = select i1 %cmp, i32 %b, i32 %c
@@ -179,7 +179,7 @@ entry:
; 64-CMP-DAG: selnez $[[T1:[0-9]+]], $6, $[[R0]]
; 64-CMP-DAG: or $2, $[[T0]], $[[T1]]
-define i64 @cmov4(i32 %a, i64 %b, i64 %c) nounwind readnone {
+define i64 @cmov4(i32 signext %a, i64 %b, i64 %c) nounwind readnone {
entry:
%cmp = icmp eq i32 %a, 234
%cond = select i1 %cmp, i64 %b, i64 %c
@@ -220,7 +220,7 @@ entry:
; 64-CMP-DAG: seleqz $[[T1:[0-9]+]], $6, $[[R0]]
; 64-CMP-DAG: or $2, $[[T0]], $[[T1]]
-define i64 @cmov4_ne(i32 %a, i64 %b, i64 %c) nounwind readnone {
+define i64 @cmov4_ne(i32 signext %a, i64 %b, i64 %c) nounwind readnone {
entry:
%cmp = icmp ne i32 %a, 234
%cond = select i1 %cmp, i64 %b, i64 %c
@@ -263,7 +263,7 @@ entry:
; 64-CMP-DAG: selnez $[[T1:[0-9]+]], $[[I5]], $[[R0]]
; 64-CMP-DAG: or $2, $[[T0]], $[[T1]]
-define i32 @slti0(i32 %a) {
+define i32 @slti0(i32 signext %a) {
entry:
%cmp = icmp sgt i32 %a, 32766
%cond = select i1 %cmp, i32 3, i32 5
@@ -302,7 +302,7 @@ entry:
; 64-CMP-DAG: seleqz $[[T1:[0-9]+]], $[[I5]], $[[R0]]
; 64-CMP-DAG: or $2, $[[T0]], $[[T1]]
-define i32 @slti1(i32 %a) {
+define i32 @slti1(i32 signext %a) {
entry:
%cmp = icmp sgt i32 %a, 32767
%cond = select i1 %cmp, i32 7, i32 5
@@ -337,7 +337,7 @@ entry:
; 64-CMP-DAG: selnez $[[T1:[0-9]+]], $[[I5]], $[[R0]]
; 64-CMP-DAG: or $2, $[[T0]], $[[T1]]
-define i32 @slti2(i32 %a) {
+define i32 @slti2(i32 signext %a) {
entry:
%cmp = icmp sgt i32 %a, -32769
%cond = select i1 %cmp, i32 3, i32 5
@@ -380,7 +380,7 @@ entry:
; 64-CMP-DAG: seleqz $[[T1:[0-9]+]], $[[I5]], $[[R0]]
; 64-CMP-DAG: or $2, $[[T0]], $[[T1]]
-define i32 @slti3(i32 %a) {
+define i32 @slti3(i32 signext %a) {
entry:
%cmp = icmp sgt i32 %a, -32770
%cond = select i1 %cmp, i32 3, i32 5
@@ -567,7 +567,7 @@ entry:
; 64-CMP-DAG: selnez $[[T1:[0-9]+]], $[[I5]], $[[R0]]
; 64-CMP-DAG: or $2, $[[T0]], $[[T1]]
-define i32 @sltiu0(i32 %a) {
+define i32 @sltiu0(i32 signext %a) {
entry:
%cmp = icmp ugt i32 %a, 32766
%cond = select i1 %cmp, i32 3, i32 5
@@ -606,7 +606,7 @@ entry:
; 64-CMP-DAG: seleqz $[[T1:[0-9]+]], $[[I5]], $[[R0]]
; 64-CMP-DAG: or $2, $[[T0]], $[[T1]]
-define i32 @sltiu1(i32 %a) {
+define i32 @sltiu1(i32 signext %a) {
entry:
%cmp = icmp ugt i32 %a, 32767
%cond = select i1 %cmp, i32 7, i32 5
@@ -641,7 +641,7 @@ entry:
; 64-CMP-DAG: selnez $[[T1:[0-9]+]], $[[I5]], $[[R0]]
; 64-CMP-DAG: or $2, $[[T0]], $[[T1]]
-define i32 @sltiu2(i32 %a) {
+define i32 @sltiu2(i32 signext %a) {
entry:
%cmp = icmp ugt i32 %a, -32769
%cond = select i1 %cmp, i32 3, i32 5
@@ -684,7 +684,7 @@ entry:
; 64-CMP-DAG: seleqz $[[T1:[0-9]+]], $[[I5]], $[[R0]]
; 64-CMP-DAG: or $2, $[[T0]], $[[T1]]
-define i32 @sltiu3(i32 %a) {
+define i32 @sltiu3(i32 signext %a) {
entry:
%cmp = icmp ugt i32 %a, -32770
%cond = select i1 %cmp, i32 3, i32 5
@@ -697,7 +697,7 @@ entry:
; doesn't generate conditional moves
; for constant operands whose difference is |1|
-define i32 @slti4(i32 %a) nounwind readnone {
+define i32 @slti4(i32 signext %a) nounwind readnone {
%1 = icmp slt i32 %a, 7
%2 = select i1 %1, i32 4, i32 3
ret i32 %2
@@ -723,7 +723,7 @@ define i32 @slti4(i32 %a) nounwind readnone {
; 64-CMP-NOT: seleqz
; 64-CMP-NOT: selnez
-define i32 @slti5(i32 %a) nounwind readnone {
+define i32 @slti5(i32 signext %a) nounwind readnone {
%1 = icmp slt i32 %a, 7
%2 = select i1 %1, i32 -3, i32 -4
ret i32 %2
@@ -749,7 +749,7 @@ define i32 @slti5(i32 %a) nounwind readnone {
; 64-CMP-NOT: seleqz
; 64-CMP-NOT: selnez
-define i32 @slti6(i32 %a) nounwind readnone {
+define i32 @slti6(i32 signext %a) nounwind readnone {
%1 = icmp slt i32 %a, 7
%2 = select i1 %1, i32 3, i32 4
ret i32 %2
diff --git a/test/CodeGen/Mips/const-mult.ll b/test/CodeGen/Mips/const-mult.ll
index 186202141dcb..60b2a88196bd 100644
--- a/test/CodeGen/Mips/const-mult.ll
+++ b/test/CodeGen/Mips/const-mult.ll
@@ -5,7 +5,7 @@
; CHECK: sll $[[R0:[0-9]+]], $4, 2
; CHECK: addu ${{[0-9]+}}, $[[R0]], $4
-define i32 @mul5_32(i32 %a) {
+define i32 @mul5_32(i32 signext %a) {
entry:
%mul = mul nsw i32 %a, 5
ret i32 %mul
@@ -17,7 +17,7 @@ entry:
; CHECK-DAG: sll $[[R2:[0-9]+]], $4, 5
; CHECK: subu ${{[0-9]+}}, $[[R2]], $[[R1]]
-define i32 @mul27_32(i32 %a) {
+define i32 @mul27_32(i32 signext %a) {
entry:
%mul = mul nsw i32 %a, 27
ret i32 %mul
@@ -29,7 +29,7 @@ entry:
; CHECK-DAG: sll $[[R2:[0-9]+]], $4, 31
; CHECK: addu ${{[0-9]+}}, $[[R2]], $[[R1]]
-define i32 @muln2147483643_32(i32 %a) {
+define i32 @muln2147483643_32(i32 signext %a) {
entry:
%mul = mul nsw i32 %a, -2147483643
ret i32 %mul
@@ -41,7 +41,7 @@ entry:
; CHECK64-DAG: dsll $[[R2:[0-9]+]], $4, 63
; CHECK64: daddu ${{[0-9]+}}, $[[R2]], $[[R1]]
-define i64 @muln9223372036854775805_64(i64 %a) {
+define i64 @muln9223372036854775805_64(i64 signext %a) {
entry:
%mul = mul nsw i64 %a, -9223372036854775805
ret i64 %mul
diff --git a/test/CodeGen/Mips/countleading.ll b/test/CodeGen/Mips/countleading.ll
index 6e63cff123cf..b7aad049e8ab 100644
--- a/test/CodeGen/Mips/countleading.ll
+++ b/test/CodeGen/Mips/countleading.ll
@@ -11,7 +11,7 @@
; MIPS32-GT-R1 - MIPS64r1 and above (does not include MIPS64's)
; MIPS64-GT-R1 - MIPS64r1 and above
-define i32 @ctlz_i32(i32 %X) nounwind readnone {
+define i32 @ctlz_i32(i32 signext %X) nounwind readnone {
entry:
; ALL-LABEL: ctlz_i32:
@@ -27,7 +27,7 @@ entry:
declare i32 @llvm.ctlz.i32(i32, i1) nounwind readnone
-define i32 @ctlo_i32(i32 %X) nounwind readnone {
+define i32 @ctlo_i32(i32 signext %X) nounwind readnone {
entry:
; ALL-LABEL: ctlo_i32:
diff --git a/test/CodeGen/Mips/divrem.ll b/test/CodeGen/Mips/divrem.ll
index 97f836044406..a9cfe0fa1523 100644
--- a/test/CodeGen/Mips/divrem.ll
+++ b/test/CodeGen/Mips/divrem.ll
@@ -27,7 +27,7 @@
@g0 = common global i32 0, align 4
@g1 = common global i32 0, align 4
-define i32 @sdiv1(i32 %a0, i32 %a1) nounwind readnone {
+define i32 @sdiv1(i32 signext %a0, i32 signext %a1) nounwind readnone {
entry:
; ALL-LABEL: sdiv1:
@@ -54,7 +54,7 @@ entry:
ret i32 %div
}
-define i32 @srem1(i32 %a0, i32 %a1) nounwind readnone {
+define i32 @srem1(i32 signext %a0, i32 signext %a1) nounwind readnone {
entry:
; ALL-LABEL: srem1:
@@ -81,7 +81,7 @@ entry:
ret i32 %rem
}
-define i32 @udiv1(i32 %a0, i32 %a1) nounwind readnone {
+define i32 @udiv1(i32 zeroext %a0, i32 zeroext %a1) nounwind readnone {
entry:
; ALL-LABEL: udiv1:
@@ -107,7 +107,7 @@ entry:
ret i32 %div
}
-define i32 @urem1(i32 %a0, i32 %a1) nounwind readnone {
+define i32 @urem1(i32 zeroext %a0, i32 zeroext %a1) nounwind readnone {
entry:
; ALL-LABEL: urem1:
@@ -134,7 +134,7 @@ entry:
ret i32 %rem
}
-define i32 @sdivrem1(i32 %a0, i32 %a1, i32* nocapture %r) nounwind {
+define i32 @sdivrem1(i32 signext %a0, i32 signext %a1, i32* nocapture %r) nounwind {
entry:
; ALL-LABEL: sdivrem1:
@@ -175,7 +175,7 @@ entry:
ret i32 %div
}
-define i32 @udivrem1(i32 %a0, i32 %a1, i32* nocapture %r) nounwind {
+define i32 @udivrem1(i32 zeroext %a0, i32 zeroext %a1, i32* nocapture %r) nounwind {
entry:
; ALL-LABEL: udivrem1:
diff --git a/test/CodeGen/Mips/ehframe-indirect.ll b/test/CodeGen/Mips/ehframe-indirect.ll
index e78497a9521e..b4efb40b6422 100644
--- a/test/CodeGen/Mips/ehframe-indirect.ll
+++ b/test/CodeGen/Mips/ehframe-indirect.ll
@@ -1,5 +1,7 @@
-; RUN: llc -mtriple=mipsel-linux-gnu < %s | FileCheck %s
-; RUN: llc -mtriple=mipsel-linux-android < %s | FileCheck %s
+; RUN: llc -mtriple=mipsel-linux-gnu < %s | FileCheck -check-prefix=CHECK32 %s
+; RUN: llc -mtriple=mipsel-linux-android < %s | FileCheck -check-prefix=CHECK32 %s
+; RUN: llc -mtriple=mips64el-linux-gnu < %s | FileCheck -check-prefix=CHECK64 %s
+; RUN: llc -mtriple=mips64el-linux-android < %s | FileCheck -check-prefix=CHECK64 %s
define i32 @main() {
; CHECK: .cfi_startproc
@@ -27,8 +29,11 @@ declare void @foo()
; CHECK: .hidden DW.ref.__gxx_personality_v0
; CHECK: .weak DW.ref.__gxx_personality_v0
; CHECK: .section .data.DW.ref.__gxx_personality_v0,"aGw",@progbits,DW.ref.__gxx_personality_v0,comdat
-; CHECK: .align 2
+; CHECK32: .align 2
+; CHECK64: .align 3
; CHECK: .type DW.ref.__gxx_personality_v0,@object
-; CHECK: .size DW.ref.__gxx_personality_v0, 4
+; CHECK32: .size DW.ref.__gxx_personality_v0, 4
+; CHECK64: .size DW.ref.__gxx_personality_v0, 8
; CHECK: DW.ref.__gxx_personality_v0:
-; CHECK: .4byte __gxx_personality_v0
+; CHECK32: .4byte __gxx_personality_v0
+; CHECK64: .8byte __gxx_personality_v0
diff --git a/test/CodeGen/Mips/fastcc.ll b/test/CodeGen/Mips/fastcc.ll
index 822902c27d2f..6b022c5e36d9 100644
--- a/test/CodeGen/Mips/fastcc.ll
+++ b/test/CodeGen/Mips/fastcc.ll
@@ -2,6 +2,7 @@
; RUN: llc < %s -mtriple=mipsel-none-nacl-gnu \
; RUN: | FileCheck %s -check-prefix=CHECK-NACL
; RUN: llc < %s -march=mipsel -mcpu=mips32 -mattr=+nooddspreg | FileCheck %s -check-prefix=NOODDSPREG
+; RUN: llc < %s -march=mipsel -mcpu=mips32r2 -mattr=+fp64,+nooddspreg | FileCheck %s -check-prefix=FP64-NOODDSPREG
@gi0 = external global i32
@@ -82,6 +83,7 @@
@g16 = external global i32
@fa = common global [11 x float] zeroinitializer, align 4
+@da = common global [11 x double] zeroinitializer, align 8
define void @caller0() nounwind {
entry:
@@ -270,7 +272,7 @@ entry:
define void @caller2() {
entry:
-; NOODDSPREG-LABEL: caller2
+; NOODDSPREG-LABEL: caller2:
; Check that first 10 arguments are passed in even float registers
; f0, f2, ... , f18. Check that 11th argument is passed on stack.
@@ -312,7 +314,7 @@ define fastcc void @callee2(float %a0, float %a1, float %a2, float %a3,
float %a8, float %a9, float %a10) {
entry:
-; NOODDSPREG-LABEL: callee2
+; NOODDSPREG-LABEL: callee2:
; NOODDSPREG: addiu $sp, $sp, -[[OFFSET:[0-9]+]]
@@ -348,3 +350,83 @@ entry:
ret void
}
+define void @caller3() {
+entry:
+
+; FP64-NOODDSPREG-LABEL: caller3:
+
+; Check that first 10 arguments are passed in even float registers
+; f0, f2, ... , f18. Check that 11th argument is passed on stack.
+
+; FP64-NOODDSPREG-DAG: lw $[[R0:[0-9]+]], %got(da)(${{[0-9]+|gp}})
+; FP64-NOODDSPREG-DAG: ldc1 $f0, 0($[[R0]])
+; FP64-NOODDSPREG-DAG: ldc1 $f2, 8($[[R0]])
+; FP64-NOODDSPREG-DAG: ldc1 $f4, 16($[[R0]])
+; FP64-NOODDSPREG-DAG: ldc1 $f6, 24($[[R0]])
+; FP64-NOODDSPREG-DAG: ldc1 $f8, 32($[[R0]])
+; FP64-NOODDSPREG-DAG: ldc1 $f10, 40($[[R0]])
+; FP64-NOODDSPREG-DAG: ldc1 $f12, 48($[[R0]])
+; FP64-NOODDSPREG-DAG: ldc1 $f14, 56($[[R0]])
+; FP64-NOODDSPREG-DAG: ldc1 $f16, 64($[[R0]])
+; FP64-NOODDSPREG-DAG: ldc1 $f18, 72($[[R0]])
+
+; FP64-NOODDSPREG-DAG: ldc1 $[[F0:f[0-9]*[02468]]], 80($[[R0]])
+; FP64-NOODDSPREG-DAG: sdc1 $[[F0]], 0($sp)
+
+ %0 = load double* getelementptr ([11 x double]* @da, i32 0, i32 0), align 8
+ %1 = load double* getelementptr ([11 x double]* @da, i32 0, i32 1), align 8
+ %2 = load double* getelementptr ([11 x double]* @da, i32 0, i32 2), align 8
+ %3 = load double* getelementptr ([11 x double]* @da, i32 0, i32 3), align 8
+ %4 = load double* getelementptr ([11 x double]* @da, i32 0, i32 4), align 8
+ %5 = load double* getelementptr ([11 x double]* @da, i32 0, i32 5), align 8
+ %6 = load double* getelementptr ([11 x double]* @da, i32 0, i32 6), align 8
+ %7 = load double* getelementptr ([11 x double]* @da, i32 0, i32 7), align 8
+ %8 = load double* getelementptr ([11 x double]* @da, i32 0, i32 8), align 8
+ %9 = load double* getelementptr ([11 x double]* @da, i32 0, i32 9), align 8
+ %10 = load double* getelementptr ([11 x double]* @da, i32 0, i32 10), align 8
+ tail call fastcc void @callee3(double %0, double %1, double %2, double %3,
+ double %4, double %5, double %6, double %7,
+ double %8, double %9, double %10)
+ ret void
+}
+
+define fastcc void @callee3(double %a0, double %a1, double %a2, double %a3,
+ double %a4, double %a5, double %a6, double %a7,
+ double %a8, double %a9, double %a10) {
+entry:
+
+; FP64-NOODDSPREG-LABEL: callee3:
+
+; FP64-NOODDSPREG: addiu $sp, $sp, -[[OFFSET:[0-9]+]]
+
+; Check that first 10 arguments are received in even float registers
+; f0, f2, ... , f18. Check that 11th argument is received on stack.
+
+; FP64-NOODDSPREG-DAG: lw $[[R0:[0-9]+]], %got(da)(${{[0-9]+|gp}})
+; FP64-NOODDSPREG-DAG: sdc1 $f0, 0($[[R0]])
+; FP64-NOODDSPREG-DAG: sdc1 $f2, 8($[[R0]])
+; FP64-NOODDSPREG-DAG: sdc1 $f4, 16($[[R0]])
+; FP64-NOODDSPREG-DAG: sdc1 $f6, 24($[[R0]])
+; FP64-NOODDSPREG-DAG: sdc1 $f8, 32($[[R0]])
+; FP64-NOODDSPREG-DAG: sdc1 $f10, 40($[[R0]])
+; FP64-NOODDSPREG-DAG: sdc1 $f12, 48($[[R0]])
+; FP64-NOODDSPREG-DAG: sdc1 $f14, 56($[[R0]])
+; FP64-NOODDSPREG-DAG: sdc1 $f16, 64($[[R0]])
+; FP64-NOODDSPREG-DAG: sdc1 $f18, 72($[[R0]])
+
+; FP64-NOODDSPREG-DAG: ldc1 $[[F0:f[0-9]*[02468]]], [[OFFSET]]($sp)
+; FP64-NOODDSPREG-DAG: sdc1 $[[F0]], 80($[[R0]])
+
+ store double %a0, double* getelementptr ([11 x double]* @da, i32 0, i32 0), align 8
+ store double %a1, double* getelementptr ([11 x double]* @da, i32 0, i32 1), align 8
+ store double %a2, double* getelementptr ([11 x double]* @da, i32 0, i32 2), align 8
+ store double %a3, double* getelementptr ([11 x double]* @da, i32 0, i32 3), align 8
+ store double %a4, double* getelementptr ([11 x double]* @da, i32 0, i32 4), align 8
+ store double %a5, double* getelementptr ([11 x double]* @da, i32 0, i32 5), align 8
+ store double %a6, double* getelementptr ([11 x double]* @da, i32 0, i32 6), align 8
+ store double %a7, double* getelementptr ([11 x double]* @da, i32 0, i32 7), align 8
+ store double %a8, double* getelementptr ([11 x double]* @da, i32 0, i32 8), align 8
+ store double %a9, double* getelementptr ([11 x double]* @da, i32 0, i32 9), align 8
+ store double %a10, double* getelementptr ([11 x double]* @da, i32 0, i32 10), align 8
+ ret void
+}
diff --git a/test/CodeGen/Mips/fp64a.ll b/test/CodeGen/Mips/fp64a.ll
index 5c2c87373a32..fadce5cb748b 100644
--- a/test/CodeGen/Mips/fp64a.ll
+++ b/test/CodeGen/Mips/fp64a.ll
@@ -12,9 +12,9 @@
; this check here.
; RUN: llc -march=mips -mcpu=mips32r2 -mattr=fp64 < %s | FileCheck %s -check-prefix=ALL -check-prefix=32R2-NO-FP64A-BE
-; RUN: llc -march=mips -mcpu=mips32r2 -mattr=fp64,nooddspreg < %s | FileCheck %s -check-prefix=ALL -check-prefix=32R2-FP64A-BE
+; RUN: llc -march=mips -mcpu=mips32r2 -mattr=fp64,nooddspreg < %s | FileCheck %s -check-prefix=ALL -check-prefix=32R2-FP64A
; RUN: llc -march=mipsel -mcpu=mips32r2 -mattr=fp64 < %s | FileCheck %s -check-prefix=ALL -check-prefix=32R2-NO-FP64A-LE
-; RUN: llc -march=mipsel -mcpu=mips32r2 -mattr=fp64,nooddspreg < %s | FileCheck %s -check-prefix=ALL -check-prefix=32R2-FP64A-LE
+; RUN: llc -march=mipsel -mcpu=mips32r2 -mattr=fp64,nooddspreg < %s | FileCheck %s -check-prefix=ALL -check-prefix=32R2-FP64A
; RUN: llc -march=mips64 -mcpu=mips64 -mattr=fp64 < %s | FileCheck %s -check-prefix=ALL -check-prefix=64-NO-FP64A
; RUN: not llc -march=mips64 -mcpu=mips64 -mattr=fp64,nooddspreg < %s 2>&1 | FileCheck %s -check-prefix=64-FP64A
@@ -38,15 +38,10 @@ define double @call1(double %d, ...) {
; 32R2-NO-FP64A-BE: mtc1 $5, $f0
; 32R2-NO-FP64A-BE: mthc1 $4, $f0
-; 32R2-FP64A-LE: addiu $sp, $sp, -8
-; 32R2-FP64A-LE: sw $4, 0($sp)
-; 32R2-FP64A-LE: sw $5, 4($sp)
-; 32R2-FP64A-LE: ldc1 $f0, 0($sp)
-
-; 32R2-FP64A-BE: addiu $sp, $sp, -8
-; 32R2-FP64A-BE: sw $5, 0($sp)
-; 32R2-FP64A-BE: sw $4, 4($sp)
-; 32R2-FP64A-BE: ldc1 $f0, 0($sp)
+; 32R2-FP64A: addiu $sp, $sp, -8
+; 32R2-FP64A: sw $4, 0($sp)
+; 32R2-FP64A: sw $5, 4($sp)
+; 32R2-FP64A: ldc1 $f0, 0($sp)
; 64-NO-FP64A: daddiu $sp, $sp, -64
; 64-NO-FP64A: mov.d $f0, $f12
@@ -63,15 +58,10 @@ define double @call2(i32 %i, double %d) {
; 32R2-NO-FP64A-BE: mtc1 $7, $f0
; 32R2-NO-FP64A-BE: mthc1 $6, $f0
-; 32R2-FP64A-LE: addiu $sp, $sp, -8
-; 32R2-FP64A-LE: sw $6, 0($sp)
-; 32R2-FP64A-LE: sw $7, 4($sp)
-; 32R2-FP64A-LE: ldc1 $f0, 0($sp)
-
-; 32R2-FP64A-BE: addiu $sp, $sp, -8
-; 32R2-FP64A-BE: sw $7, 0($sp)
-; 32R2-FP64A-BE: sw $6, 4($sp)
-; 32R2-FP64A-BE: ldc1 $f0, 0($sp)
+; 32R2-FP64A: addiu $sp, $sp, -8
+; 32R2-FP64A: sw $6, 0($sp)
+; 32R2-FP64A: sw $7, 4($sp)
+; 32R2-FP64A: ldc1 $f0, 0($sp)
; 64-NO-FP64A-NOT: daddiu $sp, $sp
; 64-NO-FP64A: mov.d $f0, $f13
@@ -88,15 +78,10 @@ define double @call3(float %f1, float %f2, double %d) {
; 32R2-NO-FP64A-BE: mtc1 $7, $f0
; 32R2-NO-FP64A-BE: mthc1 $6, $f0
-; 32R2-FP64A-LE: addiu $sp, $sp, -8
-; 32R2-FP64A-LE: sw $6, 0($sp)
-; 32R2-FP64A-LE: sw $7, 4($sp)
-; 32R2-FP64A-LE: ldc1 $f0, 0($sp)
-
-; 32R2-FP64A-BE: addiu $sp, $sp, -8
-; 32R2-FP64A-BE: sw $7, 0($sp)
-; 32R2-FP64A-BE: sw $6, 4($sp)
-; 32R2-FP64A-BE: ldc1 $f0, 0($sp)
+; 32R2-FP64A: addiu $sp, $sp, -8
+; 32R2-FP64A: sw $6, 0($sp)
+; 32R2-FP64A: sw $7, 4($sp)
+; 32R2-FP64A: ldc1 $f0, 0($sp)
; 64-NO-FP64A-NOT: daddiu $sp, $sp
; 64-NO-FP64A: mov.d $f0, $f14
@@ -113,15 +98,10 @@ define double @call4(float %f, double %d, ...) {
; 32R2-NO-FP64A-BE: mtc1 $7, $f0
; 32R2-NO-FP64A-BE: mthc1 $6, $f0
-; 32R2-FP64A-LE: addiu $sp, $sp, -8
-; 32R2-FP64A-LE: sw $6, 0($sp)
-; 32R2-FP64A-LE: sw $7, 4($sp)
-; 32R2-FP64A-LE: ldc1 $f0, 0($sp)
-
-; 32R2-FP64A-BE: addiu $sp, $sp, -8
-; 32R2-FP64A-BE: sw $7, 0($sp)
-; 32R2-FP64A-BE: sw $6, 4($sp)
-; 32R2-FP64A-BE: ldc1 $f0, 0($sp)
+; 32R2-FP64A: addiu $sp, $sp, -8
+; 32R2-FP64A: sw $6, 0($sp)
+; 32R2-FP64A: sw $7, 4($sp)
+; 32R2-FP64A: ldc1 $f0, 0($sp)
; 64-NO-FP64A: daddiu $sp, $sp, -48
; 64-NO-FP64A: mov.d $f0, $f13
@@ -145,23 +125,14 @@ define double @call5(double %a, double %b, ...) {
; 32R2-NO-FP64A-BE-DAG: mthc1 $6, $[[T1:f[0-9]+]]
; 32R2-NO-FP64A-BE: sub.d $f0, $[[T0]], $[[T1]]
-; 32R2-FP64A-LE: addiu $sp, $sp, -8
-; 32R2-FP64A-LE: sw $6, 0($sp)
-; 32R2-FP64A-LE: sw $7, 4($sp)
-; 32R2-FP64A-LE: ldc1 $[[T1:f[0-9]+]], 0($sp)
-; 32R2-FP64A-LE: sw $4, 0($sp)
-; 32R2-FP64A-LE: sw $5, 4($sp)
-; 32R2-FP64A-LE: ldc1 $[[T0:f[0-9]+]], 0($sp)
-; 32R2-FP64A-LE: sub.d $f0, $[[T0]], $[[T1]]
-
-; 32R2-FP64A-BE: addiu $sp, $sp, -8
-; 32R2-FP64A-BE: sw $7, 0($sp)
-; 32R2-FP64A-BE: sw $6, 4($sp)
-; 32R2-FP64A-BE: ldc1 $[[T1:f[0-9]+]], 0($sp)
-; 32R2-FP64A-BE: sw $5, 0($sp)
-; 32R2-FP64A-BE: sw $4, 4($sp)
-; 32R2-FP64A-BE: ldc1 $[[T0:f[0-9]+]], 0($sp)
-; 32R2-FP64A-BE: sub.d $f0, $[[T0]], $[[T1]]
+; 32R2-FP64A: addiu $sp, $sp, -8
+; 32R2-FP64A: sw $6, 0($sp)
+; 32R2-FP64A: sw $7, 4($sp)
+; 32R2-FP64A: ldc1 $[[T1:f[0-9]+]], 0($sp)
+; 32R2-FP64A: sw $4, 0($sp)
+; 32R2-FP64A: sw $5, 4($sp)
+; 32R2-FP64A: ldc1 $[[T0:f[0-9]+]], 0($sp)
+; 32R2-FP64A: sub.d $f0, $[[T0]], $[[T1]]
; 64-NO-FP64A: sub.d $f0, $f12, $f13
}
@@ -179,19 +150,12 @@ define double @move_from(double %d) {
; 32R2-NO-FP64A-BE-DAG: mfc1 $7, $f0
; 32R2-NO-FP64A-BE-DAG: mfhc1 $6, $f0
-; 32R2-FP64A-LE: addiu $sp, $sp, -32
-; 32R2-FP64A-LE: sdc1 $f0, 16($sp)
-; 32R2-FP64A-LE: lw $6, 16($sp)
-; FIXME: This store is redundant
-; 32R2-FP64A-LE: sdc1 $f0, 16($sp)
-; 32R2-FP64A-LE: lw $7, 20($sp)
-
-; 32R2-FP64A-BE: addiu $sp, $sp, -32
-; 32R2-FP64A-BE: sdc1 $f0, 16($sp)
-; 32R2-FP64A-BE: lw $6, 20($sp)
+; 32R2-FP64A: addiu $sp, $sp, -32
+; 32R2-FP64A: sdc1 $f0, 16($sp)
+; 32R2-FP64A: lw $6, 16($sp)
; FIXME: This store is redundant
-; 32R2-FP64A-BE: sdc1 $f0, 16($sp)
-; 32R2-FP64A-BE: lw $7, 16($sp)
+; 32R2-FP64A: sdc1 $f0, 16($sp)
+; 32R2-FP64A: lw $7, 20($sp)
; 64-NO-FP64A: mov.d $f13, $f0
}
diff --git a/test/CodeGen/Mips/inlineasm-operand-code.ll b/test/CodeGen/Mips/inlineasm-operand-code.ll
index 6512851a11be..3d9dec76fb37 100644
--- a/test/CodeGen/Mips/inlineasm-operand-code.ll
+++ b/test/CodeGen/Mips/inlineasm-operand-code.ll
@@ -65,6 +65,33 @@ entry:
;CHECK_LITTLE_32: addiu ${{[0-9]+}},${{[0-9]+}},$0
;CHECK_LITTLE_32: #NO_APP
tail call i32 asm sideeffect "addiu $0,$1,${2:z}", "=r,r,I"(i32 7, i32 0) nounwind
+
+; z with non-zero and the "r"(register) and "J"(integer zero) constraints
+;CHECK_LITTLE_32: #APP
+;CHECK_LITTLE_32: mtc0 ${{[1-9][0-9]?}}, ${{[0-9]+}}
+;CHECK_LITTLE_32: #NO_APP
+ call void asm sideeffect "mtc0 ${0:z}, $$12", "Jr"(i32 7) nounwind
+
+; z with zero and the "r"(register) and "J"(integer zero) constraints
+;CHECK_LITTLE_32: #APP
+;CHECK_LITTLE_32: mtc0 $0, ${{[0-9]+}}
+;CHECK_LITTLE_32: #NO_APP
+ call void asm sideeffect "mtc0 ${0:z}, $$12", "Jr"(i32 0) nounwind
+
+; z with non-zero and just the "r"(register) constraint
+;CHECK_LITTLE_32: #APP
+;CHECK_LITTLE_32: mtc0 ${{[1-9][0-9]?}}, ${{[0-9]+}}
+;CHECK_LITTLE_32: #NO_APP
+ call void asm sideeffect "mtc0 ${0:z}, $$12", "r"(i32 7) nounwind
+
+; z with zero and just the "r"(register) constraint
+; FIXME: Check for $0, instead of other registers.
+; We should be using $0 directly in this case, not real registers.
+; When the materialization of 0 gets fixed, this test will fail.
+;CHECK_LITTLE_32: #APP
+;CHECK_LITTLE_32: mtc0 ${{[1-9][0-9]?}}, ${{[0-9]+}}
+;CHECK_LITTLE_32: #NO_APP
+ call void asm sideeffect "mtc0 ${0:z}, $$12", "r"(i32 0) nounwind
ret i32 0
}
diff --git a/test/CodeGen/Mips/load-store-left-right.ll b/test/CodeGen/Mips/load-store-left-right.ll
index a3f5ebfb5460..f6d0e8debb36 100644
--- a/test/CodeGen/Mips/load-store-left-right.ll
+++ b/test/CodeGen/Mips/load-store-left-right.ll
@@ -47,7 +47,7 @@ entry:
ret i32 %0
}
-define void @store_SI(i32 %a) nounwind {
+define void @store_SI(i32 signext %a) nounwind {
entry:
; ALL-LABEL: store_SI:
@@ -201,7 +201,7 @@ entry:
ret void
}
-define void @store_SI_trunc_from_i64(i32 %a) nounwind {
+define void @store_SI_trunc_from_i64(i32 signext %a) nounwind {
entry:
; ALL-LABEL: store_SI_trunc_from_i64:
diff --git a/test/CodeGen/Mips/longbranch.ll b/test/CodeGen/Mips/longbranch.ll
index a403744c8fd5..b9b52be01dad 100644
--- a/test/CodeGen/Mips/longbranch.ll
+++ b/test/CodeGen/Mips/longbranch.ll
@@ -13,7 +13,7 @@
@x = external global i32
-define void @test1(i32 %s) {
+define void @test1(i32 signext %s) {
entry:
%cmp = icmp eq i32 %s, 0
br i1 %cmp, label %end, label %then
diff --git a/test/CodeGen/Mips/madd-msub.ll b/test/CodeGen/Mips/madd-msub.ll
index 82229677ff11..b0c3ff6ff9b5 100644
--- a/test/CodeGen/Mips/madd-msub.ll
+++ b/test/CodeGen/Mips/madd-msub.ll
@@ -76,26 +76,14 @@ entry:
; 32R6-DAG: muhu $[[T3:[0-9]+]], ${{[45]}}, ${{[45]}}
; 32R6-DAG: addu $2, $[[T3]], $[[T2]]
-; 64-DAG: dsll $[[T0:[0-9]+]], $4, 32
-; 64-DAG: dsrl $[[T1:[0-9]+]], $[[T0]], 32
-; 64-DAG: dsll $[[T2:[0-9]+]], $5, 32
-; 64-DAG: dsrl $[[T3:[0-9]+]], $[[T2]], 32
-; 64-DAG: d[[m:m]]ult $[[T3]], $[[T1]]
-; 64-DAG: [[m]]flo $[[T4:[0-9]+]]
-; 64-DAG: dsll $[[T5:[0-9]+]], $6, 32
-; 64-DAG: dsrl $[[T6:[0-9]+]], $[[T5]], 32
-; 64-DAG: daddu $2, $[[T4]], $[[T6]]
-
-; 64R6-DAG: dsll $[[T0:[0-9]+]], $4, 32
-; 64R6-DAG: dsrl $[[T1:[0-9]+]], $[[T0]], 32
-; 64R6-DAG: dsll $[[T2:[0-9]+]], $5, 32
-; 64R6-DAG: dsrl $[[T3:[0-9]+]], $[[T2]], 32
-; 64R6-DAG: dmul $[[T4:[0-9]+]], $[[T3]], $[[T1]]
-; 64R6-DAG: dsll $[[T5:[0-9]+]], $6, 32
-; 64R6-DAG: dsrl $[[T6:[0-9]+]], $[[T5]], 32
-; 64R6-DAG: daddu $2, $[[T4]], $[[T6]]
-
-define i64 @madd2(i32 %a, i32 %b, i32 %c) nounwind readnone {
+; 64-DAG: d[[m:m]]ult $5, $4
+; 64-DAG: [[m]]flo $[[T0:[0-9]+]]
+; 64-DAG: daddu $2, $[[T0]], $6
+
+; 64R6-DAG: dmul $[[T0:[0-9]+]], $5, $4
+; 64R6-DAG: daddu $2, $[[T0]], $6
+
+define i64 @madd2(i32 zeroext %a, i32 zeroext %b, i32 zeroext %c) nounwind readnone {
entry:
%conv = zext i32 %a to i64
%conv2 = zext i32 %b to i64
@@ -214,26 +202,14 @@ entry:
; 32R6-DAG: negu $2, $[[T3]]
; 32R6-DAG: subu $3, $6, $[[T1]]
-; 64-DAG: dsll $[[T0:[0-9]+]], $4, 32
-; 64-DAG: dsrl $[[T1:[0-9]+]], $[[T0]], 32
-; 64-DAG: dsll $[[T2:[0-9]+]], $5, 32
-; 64-DAG: dsrl $[[T3:[0-9]+]], $[[T2]], 32
-; 64-DAG: d[[m:m]]ult $[[T3]], $[[T1]]
-; 64-DAG: [[m]]flo $[[T4:[0-9]+]]
-; 64-DAG: dsll $[[T5:[0-9]+]], $6, 32
-; 64-DAG: dsrl $[[T6:[0-9]+]], $[[T5]], 32
-; 64-DAG: dsubu $2, $[[T6]], $[[T4]]
-
-; 64R6-DAG: dsll $[[T0:[0-9]+]], $4, 32
-; 64R6-DAG: dsrl $[[T1:[0-9]+]], $[[T0]], 32
-; 64R6-DAG: dsll $[[T2:[0-9]+]], $5, 32
-; 64R6-DAG: dsrl $[[T3:[0-9]+]], $[[T2]], 32
-; 64R6-DAG: dmul $[[T4:[0-9]+]], $[[T3]], $[[T1]]
-; 64R6-DAG: dsll $[[T5:[0-9]+]], $6, 32
-; 64R6-DAG: dsrl $[[T6:[0-9]+]], $[[T5]], 32
-; 64R6-DAG: dsubu $2, $[[T6]], $[[T4]]
-
-define i64 @msub2(i32 %a, i32 %b, i32 %c) nounwind readnone {
+; 64-DAG: d[[m:m]]ult $5, $4
+; 64-DAG: [[m]]flo $[[T0:[0-9]+]]
+; 64-DAG: dsubu $2, $6, $[[T0]]
+
+; 64R6-DAG: dmul $[[T0:[0-9]+]], $5, $4
+; 64R6-DAG: dsubu $2, $6, $[[T0]]
+
+define i64 @msub2(i32 zeroext %a, i32 zeroext %b, i32 zeroext %c) nounwind readnone {
entry:
%conv = zext i32 %c to i64
%conv2 = zext i32 %a to i64
diff --git a/test/CodeGen/Mips/mips64-f128.ll b/test/CodeGen/Mips/mips64-f128.ll
index 7f7d515d690e..f0cbbd08d79a 100644
--- a/test/CodeGen/Mips/mips64-f128.ll
+++ b/test/CodeGen/Mips/mips64-f128.ll
@@ -114,7 +114,7 @@ entry:
; ALL-LABEL: conv_LD_UInt:
; ALL: ld $25, %call16(__floatunsitf)
-define fp128 @conv_LD_UInt(i32 %a) {
+define fp128 @conv_LD_UInt(i32 signext %a) {
entry:
%conv = uitofp i32 %a to fp128
ret fp128 %conv
@@ -635,7 +635,7 @@ entry:
; CMP_CC_FMT-DAG: selnez $[[NE2:[0-9]+]], $7, $[[CC]]
; CMP_CC_FMT-DAG: or $4, $[[NE2]], $[[EQ2]]
-define fp128 @select_LD(i32 %a, i64, fp128 %b, fp128 %c) {
+define fp128 @select_LD(i32 signext %a, i64, fp128 %b, fp128 %c) {
entry:
%tobool = icmp ne i32 %a, 0
%cond = select i1 %tobool, fp128 %b, fp128 %c
diff --git a/test/CodeGen/Mips/mips64-sret.ll b/test/CodeGen/Mips/mips64-sret.ll
index 7a52c3d41d69..ed494e965b7d 100644
--- a/test/CodeGen/Mips/mips64-sret.ll
+++ b/test/CodeGen/Mips/mips64-sret.ll
@@ -11,7 +11,7 @@ entry:
ret void
}
-define void @bar(i32 %v, i32* noalias sret %agg.result) nounwind {
+define void @bar(i32 signext %v, i32* noalias sret %agg.result) nounwind {
entry:
; CHECK-LABEL: bar:
; CHECK: sw $4, 0($5)
diff --git a/test/CodeGen/Mips/msa/frameindex.ll b/test/CodeGen/Mips/msa/frameindex.ll
index 07e67bf04287..ebec465a3e33 100644
--- a/test/CodeGen/Mips/msa/frameindex.ll
+++ b/test/CodeGen/Mips/msa/frameindex.ll
@@ -36,10 +36,10 @@ define void @loadstore_v16i8_just_over_simm10() nounwind {
%2 = alloca [497 x i8] ; Push the frame just over 512 bytes
%3 = load volatile <16 x i8>* %1
- ; MIPS32-AE: addiu [[BASE:\$[0-9]+]], $sp, 512
+ ; MIPS32-AE: addiu [[BASE:\$([0-9]+|gp)]], $sp, 512
; MIPS32-AE: ld.b [[R1:\$w[0-9]+]], 0([[BASE]])
store volatile <16 x i8> %3, <16 x i8>* %1
- ; MIPS32-AE: addiu [[BASE:\$[0-9]+]], $sp, 512
+ ; MIPS32-AE: addiu [[BASE:\$([0-9]+|gp)]], $sp, 512
; MIPS32-AE: st.b [[R1]], 0([[BASE]])
ret void
@@ -53,12 +53,12 @@ define void @loadstore_v16i8_just_under_simm16() nounwind {
%2 = alloca [32752 x i8] ; Push the frame right up to 32768 bytes
%3 = load volatile <16 x i8>* %1
- ; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768
- ; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]]
+ ; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768
+ ; MIPS32-AE: addu [[BASE:\$([0-9]+|gp)]], $sp, [[R2]]
; MIPS32-AE: ld.b [[R1:\$w[0-9]+]], 0([[BASE]])
store volatile <16 x i8> %3, <16 x i8>* %1
- ; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768
- ; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]]
+ ; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768
+ ; MIPS32-AE: addu [[BASE:\$([0-9]+|gp)]], $sp, [[R2]]
; MIPS32-AE: st.b [[R1]], 0([[BASE]])
ret void
@@ -72,12 +72,12 @@ define void @loadstore_v16i8_just_over_simm16() nounwind {
%2 = alloca [32753 x i8] ; Push the frame just over 32768 bytes
%3 = load volatile <16 x i8>* %1
- ; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768
- ; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]]
+ ; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768
+ ; MIPS32-AE: addu [[BASE:\$([0-9]+|gp)]], $sp, [[R2]]
; MIPS32-AE: ld.b [[R1:\$w[0-9]+]], 0([[BASE]])
store volatile <16 x i8> %3, <16 x i8>* %1
- ; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768
- ; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]]
+ ; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768
+ ; MIPS32-AE: addu [[BASE:\$([0-9]+|gp)]], $sp, [[R2]]
; MIPS32-AE: st.b [[R1]], 0([[BASE]])
ret void
@@ -107,10 +107,10 @@ define void @loadstore_v8i16_unaligned() nounwind {
%5 = getelementptr [2 x <8 x i16>]* %4, i32 0, i32 0
%6 = load volatile <8 x i16>* %5
- ; MIPS32-AE: addiu [[BASE:\$[0-9]+]], $sp, 1
+ ; MIPS32-AE: addiu [[BASE:\$([0-9]+|gp)]], $sp, 1
; MIPS32-AE: ld.h [[R1:\$w[0-9]+]], 0([[BASE]])
store volatile <8 x i16> %6, <8 x i16>* %5
- ; MIPS32-AE: addiu [[BASE:\$[0-9]+]], $sp, 1
+ ; MIPS32-AE: addiu [[BASE:\$([0-9]+|gp)]], $sp, 1
; MIPS32-AE: st.h [[R1]], 0([[BASE]])
ret void
@@ -139,10 +139,10 @@ define void @loadstore_v8i16_just_over_simm10() nounwind {
%2 = alloca [1009 x i8] ; Push the frame just over 1024 bytes
%3 = load volatile <8 x i16>* %1
- ; MIPS32-AE: addiu [[BASE:\$[0-9]+]], $sp, 1024
+ ; MIPS32-AE: addiu [[BASE:\$([0-9]+|gp)]], $sp, 1024
; MIPS32-AE: ld.h [[R1:\$w[0-9]+]], 0([[BASE]])
store volatile <8 x i16> %3, <8 x i16>* %1
- ; MIPS32-AE: addiu [[BASE:\$[0-9]+]], $sp, 1024
+ ; MIPS32-AE: addiu [[BASE:\$([0-9]+|gp)]], $sp, 1024
; MIPS32-AE: st.h [[R1]], 0([[BASE]])
ret void
@@ -156,12 +156,12 @@ define void @loadstore_v8i16_just_under_simm16() nounwind {
%2 = alloca [32752 x i8] ; Push the frame right up to 32768 bytes
%3 = load volatile <8 x i16>* %1
- ; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768
- ; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]]
+ ; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768
+ ; MIPS32-AE: addu [[BASE:\$([0-9]+|gp)]], $sp, [[R2]]
; MIPS32-AE: ld.h [[R1:\$w[0-9]+]], 0([[BASE]])
store volatile <8 x i16> %3, <8 x i16>* %1
- ; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768
- ; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]]
+ ; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768
+ ; MIPS32-AE: addu [[BASE:\$([0-9]+|gp)]], $sp, [[R2]]
; MIPS32-AE: st.h [[R1]], 0([[BASE]])
ret void
@@ -175,12 +175,12 @@ define void @loadstore_v8i16_just_over_simm16() nounwind {
%2 = alloca [32753 x i8] ; Push the frame just over 32768 bytes
%3 = load volatile <8 x i16>* %1
- ; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768
- ; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]]
+ ; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768
+ ; MIPS32-AE: addu [[BASE:\$([0-9]+|gp)]], $sp, [[R2]]
; MIPS32-AE: ld.h [[R1:\$w[0-9]+]], 0([[BASE]])
store volatile <8 x i16> %3, <8 x i16>* %1
- ; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768
- ; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]]
+ ; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768
+ ; MIPS32-AE: addu [[BASE:\$([0-9]+|gp)]], $sp, [[R2]]
; MIPS32-AE: st.h [[R1]], 0([[BASE]])
ret void
@@ -210,10 +210,10 @@ define void @loadstore_v4i32_unaligned() nounwind {
%5 = getelementptr [2 x <4 x i32>]* %4, i32 0, i32 0
%6 = load volatile <4 x i32>* %5
- ; MIPS32-AE: addiu [[BASE:\$[0-9]+]], $sp, 1
+ ; MIPS32-AE: addiu [[BASE:\$([0-9]+|gp)]], $sp, 1
; MIPS32-AE: ld.w [[R1:\$w[0-9]+]], 0([[BASE]])
store volatile <4 x i32> %6, <4 x i32>* %5
- ; MIPS32-AE: addiu [[BASE:\$[0-9]+]], $sp, 1
+ ; MIPS32-AE: addiu [[BASE:\$([0-9]+|gp)]], $sp, 1
; MIPS32-AE: st.w [[R1]], 0([[BASE]])
ret void
@@ -242,10 +242,10 @@ define void @loadstore_v4i32_just_over_simm10() nounwind {
%2 = alloca [2033 x i8] ; Push the frame just over 2048 bytes
%3 = load volatile <4 x i32>* %1
- ; MIPS32-AE: addiu [[BASE:\$[0-9]+]], $sp, 2048
+ ; MIPS32-AE: addiu [[BASE:\$([0-9]+|gp)]], $sp, 2048
; MIPS32-AE: ld.w [[R1:\$w[0-9]+]], 0([[BASE]])
store volatile <4 x i32> %3, <4 x i32>* %1
- ; MIPS32-AE: addiu [[BASE:\$[0-9]+]], $sp, 2048
+ ; MIPS32-AE: addiu [[BASE:\$([0-9]+|gp)]], $sp, 2048
; MIPS32-AE: st.w [[R1]], 0([[BASE]])
ret void
@@ -259,12 +259,12 @@ define void @loadstore_v4i32_just_under_simm16() nounwind {
%2 = alloca [32752 x i8] ; Push the frame right up to 32768 bytes
%3 = load volatile <4 x i32>* %1
- ; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768
- ; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]]
+ ; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768
+ ; MIPS32-AE: addu [[BASE:\$([0-9]+|gp)]], $sp, [[R2]]
; MIPS32-AE: ld.w [[R1:\$w[0-9]+]], 0([[BASE]])
store volatile <4 x i32> %3, <4 x i32>* %1
- ; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768
- ; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]]
+ ; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768
+ ; MIPS32-AE: addu [[BASE:\$([0-9]+|gp)]], $sp, [[R2]]
; MIPS32-AE: st.w [[R1]], 0([[BASE]])
ret void
@@ -278,12 +278,12 @@ define void @loadstore_v4i32_just_over_simm16() nounwind {
%2 = alloca [32753 x i8] ; Push the frame just over 32768 bytes
%3 = load volatile <4 x i32>* %1
- ; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768
- ; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]]
+ ; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768
+ ; MIPS32-AE: addu [[BASE:\$([0-9]+|gp)]], $sp, [[R2]]
; MIPS32-AE: ld.w [[R1:\$w[0-9]+]], 0([[BASE]])
store volatile <4 x i32> %3, <4 x i32>* %1
- ; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768
- ; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]]
+ ; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768
+ ; MIPS32-AE: addu [[BASE:\$([0-9]+|gp)]], $sp, [[R2]]
; MIPS32-AE: st.w [[R1]], 0([[BASE]])
ret void
@@ -313,10 +313,10 @@ define void @loadstore_v2i64_unaligned() nounwind {
%5 = getelementptr [2 x <2 x i64>]* %4, i32 0, i32 0
%6 = load volatile <2 x i64>* %5
- ; MIPS32-AE: addiu [[BASE:\$[0-9]+]], $sp, 1
+ ; MIPS32-AE: addiu [[BASE:\$([0-9]+|gp)]], $sp, 1
; MIPS32-AE: ld.d [[R1:\$w[0-9]+]], 0([[BASE]])
store volatile <2 x i64> %6, <2 x i64>* %5
- ; MIPS32-AE: addiu [[BASE:\$[0-9]+]], $sp, 1
+ ; MIPS32-AE: addiu [[BASE:\$([0-9]+|gp)]], $sp, 1
; MIPS32-AE: st.d [[R1]], 0([[BASE]])
ret void
@@ -345,10 +345,10 @@ define void @loadstore_v2i64_just_over_simm10() nounwind {
%2 = alloca [4081 x i8] ; Push the frame just over 4096 bytes
%3 = load volatile <2 x i64>* %1
- ; MIPS32-AE: addiu [[BASE:\$[0-9]+]], $sp, 4096
+ ; MIPS32-AE: addiu [[BASE:\$([0-9]+|gp)]], $sp, 4096
; MIPS32-AE: ld.d [[R1:\$w[0-9]+]], 0([[BASE]])
store volatile <2 x i64> %3, <2 x i64>* %1
- ; MIPS32-AE: addiu [[BASE:\$[0-9]+]], $sp, 4096
+ ; MIPS32-AE: addiu [[BASE:\$([0-9]+|gp)]], $sp, 4096
; MIPS32-AE: st.d [[R1]], 0([[BASE]])
ret void
@@ -362,12 +362,12 @@ define void @loadstore_v2i64_just_under_simm16() nounwind {
%2 = alloca [32752 x i8] ; Push the frame right up to 32768 bytes
%3 = load volatile <2 x i64>* %1
- ; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768
- ; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]]
+ ; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768
+ ; MIPS32-AE: addu [[BASE:\$([0-9]+|gp)]], $sp, [[R2]]
; MIPS32-AE: ld.d [[R1:\$w[0-9]+]], 0([[BASE]])
store volatile <2 x i64> %3, <2 x i64>* %1
- ; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768
- ; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]]
+ ; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768
+ ; MIPS32-AE: addu [[BASE:\$([0-9]+|gp)]], $sp, [[R2]]
; MIPS32-AE: st.d [[R1]], 0([[BASE]])
ret void
@@ -381,12 +381,12 @@ define void @loadstore_v2i64_just_over_simm16() nounwind {
%2 = alloca [32753 x i8] ; Push the frame just over 32768 bytes
%3 = load volatile <2 x i64>* %1
- ; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768
- ; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]]
+ ; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768
+ ; MIPS32-AE: addu [[BASE:\$([0-9]+|gp)]], $sp, [[R2]]
; MIPS32-AE: ld.d [[R1:\$w[0-9]+]], 0([[BASE]])
store volatile <2 x i64> %3, <2 x i64>* %1
- ; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768
- ; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]]
+ ; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768
+ ; MIPS32-AE: addu [[BASE:\$([0-9]+|gp)]], $sp, [[R2]]
; MIPS32-AE: st.d [[R1]], 0([[BASE]])
ret void
diff --git a/test/CodeGen/Mips/octeon_popcnt.ll b/test/CodeGen/Mips/octeon_popcnt.ll
index 52c37f69d020..3432b3992984 100644
--- a/test/CodeGen/Mips/octeon_popcnt.ll
+++ b/test/CodeGen/Mips/octeon_popcnt.ll
@@ -6,7 +6,7 @@ define i8 @cnt8(i8 %x) nounwind readnone {
ret i8 %cnt
; OCTEON-LABEL: cnt8:
; OCTEON: jr $ra
-; OCTEON: pop $2, $1
+; OCTEON: pop $2, [[R1:\$[0-9]+]]
; MIPS64-LABEL: cnt8:
; MIPS64-NOT: pop
}
@@ -16,12 +16,12 @@ define i16 @cnt16(i16 %x) nounwind readnone {
ret i16 %cnt
; OCTEON-LABEL: cnt16:
; OCTEON: jr $ra
-; OCTEON: pop $2, $1
+; OCTEON: pop $2, [[R1:\$[0-9]+]]
; MIPS64-LABEL: cnt16:
; MIPS64-NOT: pop
}
-define i32 @cnt32(i32 %x) nounwind readnone {
+define i32 @cnt32(i32 zeroext %x) nounwind readnone {
%cnt = tail call i32 @llvm.ctpop.i32(i32 %x)
ret i32 %cnt
; OCTEON-LABEL: cnt32:
diff --git a/test/CodeGen/Mips/select.ll b/test/CodeGen/Mips/select.ll
index eb2198b36dff..d6e1826c30c8 100644
--- a/test/CodeGen/Mips/select.ll
+++ b/test/CodeGen/Mips/select.ll
@@ -8,7 +8,7 @@
@d2 = external global double
@d3 = external global double
-define i32 @i32_icmp_ne_i32_val(i32 %s, i32 %f0, i32 %f1) nounwind readnone {
+define i32 @i32_icmp_ne_i32_val(i32 signext %s, i32 signext %f0, i32 signext %f1) nounwind readnone {
entry:
; ALL-LABEL: i32_icmp_ne_i32_val:
@@ -37,7 +37,7 @@ entry:
ret i32 %cond
}
-define i64 @i32_icmp_ne_i64_val(i32 %s, i64 %f0, i64 %f1) nounwind readnone {
+define i64 @i32_icmp_ne_i64_val(i32 signext %s, i64 %f0, i64 %f1) nounwind readnone {
entry:
; ALL-LABEL: i32_icmp_ne_i64_val:
@@ -128,7 +128,7 @@ entry:
ret i64 %cond
}
-define float @i32_icmp_ne_f32_val(i32 %s, float %f0, float %f1) nounwind readnone {
+define float @i32_icmp_ne_f32_val(i32 signext %s, float %f0, float %f1) nounwind readnone {
entry:
; ALL-LABEL: i32_icmp_ne_f32_val:
@@ -161,7 +161,7 @@ entry:
ret float %cond
}
-define double @i32_icmp_ne_f64_val(i32 %s, double %f0, double %f1) nounwind readnone {
+define double @i32_icmp_ne_f64_val(i32 signext %s, double %f0, double %f1) nounwind readnone {
entry:
; ALL-LABEL: i32_icmp_ne_f64_val:
@@ -496,7 +496,7 @@ entry:
ret float %cond
}
-define i32 @f32_fcmp_oeq_i32_val(i32 %f0, i32 %f1, float %f2, float %f3) nounwind readnone {
+define i32 @f32_fcmp_oeq_i32_val(i32 signext %f0, i32 signext %f1, float %f2, float %f3) nounwind readnone {
entry:
; ALL-LABEL: f32_fcmp_oeq_i32_val:
@@ -541,7 +541,7 @@ entry:
ret i32 %cond
}
-define i32 @f32_fcmp_olt_i32_val(i32 %f0, i32 %f1, float %f2, float %f3) nounwind readnone {
+define i32 @f32_fcmp_olt_i32_val(i32 signext %f0, i32 signext %f1, float %f2, float %f3) nounwind readnone {
entry:
; ALL-LABEL: f32_fcmp_olt_i32_val:
@@ -585,7 +585,7 @@ entry:
ret i32 %cond
}
-define i32 @f32_fcmp_ogt_i32_val(i32 %f0, i32 %f1, float %f2, float %f3) nounwind readnone {
+define i32 @f32_fcmp_ogt_i32_val(i32 signext %f0, i32 signext %f1, float %f2, float %f3) nounwind readnone {
entry:
; ALL-LABEL: f32_fcmp_ogt_i32_val:
@@ -630,7 +630,7 @@ entry:
ret i32 %cond
}
-define i32 @f64_fcmp_oeq_i32_val(i32 %f0, i32 %f1) nounwind readonly {
+define i32 @f64_fcmp_oeq_i32_val(i32 signext %f0, i32 signext %f1) nounwind readonly {
entry:
; ALL-LABEL: f64_fcmp_oeq_i32_val:
@@ -707,7 +707,7 @@ entry:
ret i32 %cond
}
-define i32 @f64_fcmp_olt_i32_val(i32 %f0, i32 %f1) nounwind readonly {
+define i32 @f64_fcmp_olt_i32_val(i32 signext %f0, i32 signext %f1) nounwind readonly {
entry:
; ALL-LABEL: f64_fcmp_olt_i32_val:
@@ -784,7 +784,7 @@ entry:
ret i32 %cond
}
-define i32 @f64_fcmp_ogt_i32_val(i32 %f0, i32 %f1) nounwind readonly {
+define i32 @f64_fcmp_ogt_i32_val(i32 signext %f0, i32 signext %f1) nounwind readonly {
entry:
; ALL-LABEL: f64_fcmp_ogt_i32_val:
diff --git a/test/CodeGen/Mips/zeroreg.ll b/test/CodeGen/Mips/zeroreg.ll
index a1b6cb0322b1..c766d3b3cc2a 100644
--- a/test/CodeGen/Mips/zeroreg.ll
+++ b/test/CodeGen/Mips/zeroreg.ll
@@ -8,7 +8,7 @@
@g1 = external global i32
-define i32 @sel_icmp_nez_i32_z0(i32 %s) nounwind readonly {
+define i32 @sel_icmp_nez_i32_z0(i32 signext %s) nounwind readonly {
entry:
; ALL-LABEL: sel_icmp_nez_i32_z0:
@@ -30,7 +30,7 @@ entry:
ret i32 %cond
}
-define i32 @sel_icmp_nez_i32_z1(i32 %s) nounwind readonly {
+define i32 @sel_icmp_nez_i32_z1(i32 signext %s) nounwind readonly {
entry:
; ALL-LABEL: sel_icmp_nez_i32_z1:
diff --git a/test/CodeGen/PowerPC/blockaddress.ll b/test/CodeGen/PowerPC/blockaddress.ll
new file mode 100644
index 000000000000..c1981e21fff4
--- /dev/null
+++ b/test/CodeGen/PowerPC/blockaddress.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -code-model=small -march=ppc64 -mtriple=powerpc64-unknown-linux-gnu | FileCheck %s -check-prefix=SMALL
+; RUN: llc < %s -code-model=medium -march=ppc64 -mtriple=powerpc64-unknown-linux-gnu | FileCheck %s -check-prefix=MEDIUM
+; RUN: llc < %s -code-model=large -march=ppc64 -mtriple=powerpc64-unknown-linux-gnu | FileCheck %s -check-prefix=MEDIUM
+; RUN: llc < %s -code-model=small -march=ppc64 -mtriple=powerpc64le-unknown-linux-gnu | FileCheck %s -check-prefix=SMALL
+; RUN: llc < %s -code-model=medium -march=ppc64 -mtriple=powerpc64le-unknown-linux-gnu | FileCheck %s -check-prefix=MEDIUM
+; RUN: llc < %s -code-model=large -march=ppc64 -mtriple=powerpc64le-unknown-linux-gnu | FileCheck %s -check-prefix=MEDIUM
+
+define i8* @test() {
+entry:
+ br label %here
+
+here: ; preds = %entry
+; MEDIUM: .Ltmp[[TMP0:[0-9]+]]:
+; MEDIUM: addis [[R0:[0-9]+]], 2, .LC[[LC0:[0-9]+]]@toc@ha
+; MEDIUM: ld 3, .LC[[LC0]]@toc@l([[R0]])
+; MEDIUM: blr
+; MEDIUM: .LC[[LC0]]:
+; MEDIUM: .tc .Ltmp[[TMP0]][TC],.Ltmp[[TMP0]]
+; SMALL: .Ltmp[[TMP0:[0-9]+]]:
+; SMALL: ld 3, .LC[[LC0:[0-9]+]]@toc(2)
+; SMALL: blr
+; SMALL: .LC[[LC0]]:
+; SMALL: .tc .Ltmp[[TMP0]][TC],.Ltmp[[TMP0]]
+ ret i8* blockaddress(@test, %here)
+}
+
diff --git a/test/CodeGen/PowerPC/cc.ll b/test/CodeGen/PowerPC/cc.ll
index f92121bd7202..c23ee7c9f5c8 100644
--- a/test/CodeGen/PowerPC/cc.ll
+++ b/test/CodeGen/PowerPC/cc.ll
@@ -41,7 +41,7 @@ entry:
br label %foo
foo:
- call { i64, i64 } asm sideeffect "sc", "={r0},={r3},{r0},~{cc}" (i64 %a)
+ call { i64, i64 } asm sideeffect "sc", "={r0},={r3},{r0},~{cc},~{cr1},~{cr2},~{cr3},~{cr4},~{cr5},~{cr6},~{cr7}" (i64 %a)
br i1 %c, label %bar, label %end
bar:
diff --git a/test/CodeGen/PowerPC/fast-isel-conversion.ll b/test/CodeGen/PowerPC/fast-isel-conversion.ll
index 5e00675c0398..71611060ed7a 100644
--- a/test/CodeGen/PowerPC/fast-isel-conversion.ll
+++ b/test/CodeGen/PowerPC/fast-isel-conversion.ll
@@ -1,4 +1,5 @@
; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s --check-prefix=ELF64
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 | FileCheck %s --check-prefix=ELF64LE
; RUN: llc < %s -O0 -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -mcpu=970 | FileCheck %s --check-prefix=PPC970
;; Tests for 970 don't use -fast-isel-abort because we intentionally punt
@@ -9,12 +10,16 @@
define void @sitofp_single_i64(i64 %a, float %b) nounwind ssp {
entry:
; ELF64: sitofp_single_i64
+; ELF64LE: sitofp_single_i64
; PPC970: sitofp_single_i64
%b.addr = alloca float, align 4
%conv = sitofp i64 %a to float
; ELF64: std
; ELF64: lfd
; ELF64: fcfids
+; ELF64LE: std
+; ELF64LE: lfd
+; ELF64LE: fcfids
; PPC970: std
; PPC970: lfd
; PPC970: fcfid
@@ -26,12 +31,20 @@ entry:
define void @sitofp_single_i32(i32 %a, float %b) nounwind ssp {
entry:
; ELF64: sitofp_single_i32
+; ELF64LE: sitofp_single_i32
; PPC970: sitofp_single_i32
%b.addr = alloca float, align 4
%conv = sitofp i32 %a to float
; ELF64: std
+; stack offset used to load the float: 65524 = -16 + 4
+; ELF64: ori {{[0-9]+}}, {{[0-9]+}}, 65524
; ELF64: lfiwax
; ELF64: fcfids
+; ELF64LE: std
+; stack offset used to load the float: 65520 = -16 + 0
+; ELF64LE: ori {{[0-9]+}}, {{[0-9]+}}, 65520
+; ELF64LE: lfiwax
+; ELF64LE: fcfids
; PPC970: std
; PPC970: lfd
; PPC970: fcfid
@@ -43,6 +56,7 @@ entry:
define void @sitofp_single_i16(i16 %a, float %b) nounwind ssp {
entry:
; ELF64: sitofp_single_i16
+; ELF64LE: sitofp_single_i16
; PPC970: sitofp_single_i16
%b.addr = alloca float, align 4
%conv = sitofp i16 %a to float
@@ -50,6 +64,10 @@ entry:
; ELF64: std
; ELF64: lfd
; ELF64: fcfids
+; ELF64LE: extsh
+; ELF64LE: std
+; ELF64LE: lfd
+; ELF64LE: fcfids
; PPC970: extsh
; PPC970: std
; PPC970: lfd
@@ -62,6 +80,7 @@ entry:
define void @sitofp_single_i8(i8 %a) nounwind ssp {
entry:
; ELF64: sitofp_single_i8
+; ELF64LE: sitofp_single_i8
; PPC970: sitofp_single_i8
%b.addr = alloca float, align 4
%conv = sitofp i8 %a to float
@@ -69,6 +88,10 @@ entry:
; ELF64: std
; ELF64: lfd
; ELF64: fcfids
+; ELF64LE: extsb
+; ELF64LE: std
+; ELF64LE: lfd
+; ELF64LE: fcfids
; PPC970: extsb
; PPC970: std
; PPC970: lfd
@@ -81,12 +104,20 @@ entry:
define void @sitofp_double_i32(i32 %a, double %b) nounwind ssp {
entry:
; ELF64: sitofp_double_i32
+; ELF64LE: sitofp_double_i32
; PPC970: sitofp_double_i32
%b.addr = alloca double, align 8
%conv = sitofp i32 %a to double
; ELF64: std
+; stack offset used to load the float: 65524 = -16 + 4
+; ELF64: ori {{[0-9]+}}, {{[0-9]+}}, 65524
; ELF64: lfiwax
; ELF64: fcfid
+; ELF64LE: std
+; stack offset used to load the float: 65520 = -16 + 0
+; ELF64LE: ori {{[0-9]+}}, {{[0-9]+}}, 65520
+; ELF64LE: lfiwax
+; ELF64LE: fcfid
; PPC970: std
; PPC970: lfd
; PPC970: fcfid
@@ -97,12 +128,16 @@ entry:
define void @sitofp_double_i64(i64 %a, double %b) nounwind ssp {
entry:
; ELF64: sitofp_double_i64
+; ELF64LE: sitofp_double_i64
; PPC970: sitofp_double_i64
%b.addr = alloca double, align 8
%conv = sitofp i64 %a to double
; ELF64: std
; ELF64: lfd
; ELF64: fcfid
+; ELF64LE: std
+; ELF64LE: lfd
+; ELF64LE: fcfid
; PPC970: std
; PPC970: lfd
; PPC970: fcfid
@@ -113,6 +148,7 @@ entry:
define void @sitofp_double_i16(i16 %a, double %b) nounwind ssp {
entry:
; ELF64: sitofp_double_i16
+; ELF64LE: sitofp_double_i16
; PPC970: sitofp_double_i16
%b.addr = alloca double, align 8
%conv = sitofp i16 %a to double
@@ -120,6 +156,10 @@ entry:
; ELF64: std
; ELF64: lfd
; ELF64: fcfid
+; ELF64LE: extsh
+; ELF64LE: std
+; ELF64LE: lfd
+; ELF64LE: fcfid
; PPC970: extsh
; PPC970: std
; PPC970: lfd
@@ -131,6 +171,7 @@ entry:
define void @sitofp_double_i8(i8 %a, double %b) nounwind ssp {
entry:
; ELF64: sitofp_double_i8
+; ELF64LE: sitofp_double_i8
; PPC970: sitofp_double_i8
%b.addr = alloca double, align 8
%conv = sitofp i8 %a to double
@@ -138,6 +179,10 @@ entry:
; ELF64: std
; ELF64: lfd
; ELF64: fcfid
+; ELF64LE: extsb
+; ELF64LE: std
+; ELF64LE: lfd
+; ELF64LE: fcfid
; PPC970: extsb
; PPC970: std
; PPC970: lfd
@@ -151,12 +196,16 @@ entry:
define void @uitofp_single_i64(i64 %a, float %b) nounwind ssp {
entry:
; ELF64: uitofp_single_i64
+; ELF64LE: uitofp_single_i64
; PPC970: uitofp_single_i64
%b.addr = alloca float, align 4
%conv = uitofp i64 %a to float
; ELF64: std
; ELF64: lfd
; ELF64: fcfidus
+; ELF64LE: std
+; ELF64LE: lfd
+; ELF64LE: fcfidus
; PPC970-NOT: fcfidus
store float %conv, float* %b.addr, align 4
ret void
@@ -165,12 +214,20 @@ entry:
define void @uitofp_single_i32(i32 %a, float %b) nounwind ssp {
entry:
; ELF64: uitofp_single_i32
+; ELF64LE: uitofp_single_i32
; PPC970: uitofp_single_i32
%b.addr = alloca float, align 4
%conv = uitofp i32 %a to float
; ELF64: std
+; stack offset used to load the float: 65524 = -16 + 4
+; ELF64: ori {{[0-9]+}}, {{[0-9]+}}, 65524
; ELF64: lfiwzx
; ELF64: fcfidus
+; ELF64LE: std
+; stack offset used to load the float: 65520 = -16 + 0
+; ELF64LE: ori {{[0-9]+}}, {{[0-9]+}}, 65520
+; ELF64LE: lfiwzx
+; ELF64LE: fcfidus
; PPC970-NOT: lfiwzx
; PPC970-NOT: fcfidus
store float %conv, float* %b.addr, align 4
@@ -180,6 +237,7 @@ entry:
define void @uitofp_single_i16(i16 %a, float %b) nounwind ssp {
entry:
; ELF64: uitofp_single_i16
+; ELF64LE: uitofp_single_i16
; PPC970: uitofp_single_i16
%b.addr = alloca float, align 4
%conv = uitofp i16 %a to float
@@ -187,6 +245,10 @@ entry:
; ELF64: std
; ELF64: lfd
; ELF64: fcfidus
+; ELF64LE: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 48
+; ELF64LE: std
+; ELF64LE: lfd
+; ELF64LE: fcfidus
; PPC970: rlwinm {{[0-9]+}}, {{[0-9]+}}, 0, 16, 31
; PPC970: std
; PPC970: lfd
@@ -199,6 +261,7 @@ entry:
define void @uitofp_single_i8(i8 %a) nounwind ssp {
entry:
; ELF64: uitofp_single_i8
+; ELF64LE: uitofp_single_i8
; PPC970: uitofp_single_i8
%b.addr = alloca float, align 4
%conv = uitofp i8 %a to float
@@ -206,6 +269,10 @@ entry:
; ELF64: std
; ELF64: lfd
; ELF64: fcfidus
+; ELF64LE: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 56
+; ELF64LE: std
+; ELF64LE: lfd
+; ELF64LE: fcfidus
; PPC970: rlwinm {{[0-9]+}}, {{[0-9]+}}, 0, 24, 31
; PPC970: std
; PPC970: lfd
@@ -218,12 +285,16 @@ entry:
define void @uitofp_double_i64(i64 %a, double %b) nounwind ssp {
entry:
; ELF64: uitofp_double_i64
+; ELF64LE: uitofp_double_i64
; PPC970: uitofp_double_i64
%b.addr = alloca double, align 8
%conv = uitofp i64 %a to double
; ELF64: std
; ELF64: lfd
; ELF64: fcfidu
+; ELF64LE: std
+; ELF64LE: lfd
+; ELF64LE: fcfidu
; PPC970-NOT: fcfidu
store double %conv, double* %b.addr, align 8
ret void
@@ -232,12 +303,20 @@ entry:
define void @uitofp_double_i32(i32 %a, double %b) nounwind ssp {
entry:
; ELF64: uitofp_double_i32
+; ELF64LE: uitofp_double_i32
; PPC970: uitofp_double_i32
%b.addr = alloca double, align 8
%conv = uitofp i32 %a to double
; ELF64: std
+; stack offset used to load the float: 65524 = -16 + 4
+; ELF64: ori {{[0-9]+}}, {{[0-9]+}}, 65524
; ELF64: lfiwzx
; ELF64: fcfidu
+; ELF64LE: std
+; stack offset used to load the float: 65520 = -16 + 0
+; ELF64LE: ori {{[0-9]+}}, {{[0-9]+}}, 65520
+; ELF64LE: lfiwzx
+; ELF64LE: fcfidu
; PPC970-NOT: lfiwzx
; PPC970-NOT: fcfidu
store double %conv, double* %b.addr, align 8
@@ -247,6 +326,7 @@ entry:
define void @uitofp_double_i16(i16 %a, double %b) nounwind ssp {
entry:
; ELF64: uitofp_double_i16
+; ELF64LE: uitofp_double_i16
; PPC970: uitofp_double_i16
%b.addr = alloca double, align 8
%conv = uitofp i16 %a to double
@@ -254,6 +334,10 @@ entry:
; ELF64: std
; ELF64: lfd
; ELF64: fcfidu
+; ELF64LE: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 48
+; ELF64LE: std
+; ELF64LE: lfd
+; ELF64LE: fcfidu
; PPC970: rlwinm {{[0-9]+}}, {{[0-9]+}}, 0, 16, 31
; PPC970: std
; PPC970: lfd
@@ -265,6 +349,7 @@ entry:
define void @uitofp_double_i8(i8 %a, double %b) nounwind ssp {
entry:
; ELF64: uitofp_double_i8
+; ELF64LE: uitofp_double_i8
; PPC970: uitofp_double_i8
%b.addr = alloca double, align 8
%conv = uitofp i8 %a to double
@@ -272,6 +357,10 @@ entry:
; ELF64: std
; ELF64: lfd
; ELF64: fcfidu
+; ELF64LE: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 56
+; ELF64LE: std
+; ELF64LE: lfd
+; ELF64LE: fcfidu
; PPC970: rlwinm {{[0-9]+}}, {{[0-9]+}}, 0, 24, 31
; PPC970: std
; PPC970: lfd
@@ -285,12 +374,16 @@ entry:
define void @fptosi_float_i32(float %a) nounwind ssp {
entry:
; ELF64: fptosi_float_i32
+; ELF64LE: fptosi_float_i32
; PPC970: fptosi_float_i32
%b.addr = alloca i32, align 4
%conv = fptosi float %a to i32
; ELF64: fctiwz
; ELF64: stfd
; ELF64: lwa
+; ELF64LE: fctiwz
+; ELF64LE: stfd
+; ELF64LE: lwa
; PPC970: fctiwz
; PPC970: stfd
; PPC970: lwa
@@ -301,12 +394,16 @@ entry:
define void @fptosi_float_i64(float %a) nounwind ssp {
entry:
; ELF64: fptosi_float_i64
+; ELF64LE: fptosi_float_i64
; PPC970: fptosi_float_i64
%b.addr = alloca i64, align 4
%conv = fptosi float %a to i64
; ELF64: fctidz
; ELF64: stfd
; ELF64: ld
+; ELF64LE: fctidz
+; ELF64LE: stfd
+; ELF64LE: ld
; PPC970: fctidz
; PPC970: stfd
; PPC970: ld
@@ -317,12 +414,16 @@ entry:
define void @fptosi_double_i32(double %a) nounwind ssp {
entry:
; ELF64: fptosi_double_i32
+; ELF64LE: fptosi_double_i32
; PPC970: fptosi_double_i32
%b.addr = alloca i32, align 8
%conv = fptosi double %a to i32
; ELF64: fctiwz
; ELF64: stfd
; ELF64: lwa
+; ELF64LE: fctiwz
+; ELF64LE: stfd
+; ELF64LE: lwa
; PPC970: fctiwz
; PPC970: stfd
; PPC970: lwa
@@ -333,12 +434,16 @@ entry:
define void @fptosi_double_i64(double %a) nounwind ssp {
entry:
; ELF64: fptosi_double_i64
+; ELF64LE: fptosi_double_i64
; PPC970: fptosi_double_i64
%b.addr = alloca i64, align 8
%conv = fptosi double %a to i64
; ELF64: fctidz
; ELF64: stfd
; ELF64: ld
+; ELF64LE: fctidz
+; ELF64LE: stfd
+; ELF64LE: ld
; PPC970: fctidz
; PPC970: stfd
; PPC970: ld
@@ -351,12 +456,16 @@ entry:
define void @fptoui_float_i32(float %a) nounwind ssp {
entry:
; ELF64: fptoui_float_i32
+; ELF64LE: fptoui_float_i32
; PPC970: fptoui_float_i32
%b.addr = alloca i32, align 4
%conv = fptoui float %a to i32
; ELF64: fctiwuz
; ELF64: stfd
; ELF64: lwz
+; ELF64LE: fctiwuz
+; ELF64LE: stfd
+; ELF64LE: lwz
; PPC970: fctidz
; PPC970: stfd
; PPC970: lwz
@@ -367,12 +476,16 @@ entry:
define void @fptoui_float_i64(float %a) nounwind ssp {
entry:
; ELF64: fptoui_float_i64
+; ELF64LE: fptoui_float_i64
; PPC970: fptoui_float_i64
%b.addr = alloca i64, align 4
%conv = fptoui float %a to i64
; ELF64: fctiduz
; ELF64: stfd
; ELF64: ld
+; ELF64LE: fctiduz
+; ELF64LE: stfd
+; ELF64LE: ld
; PPC970-NOT: fctiduz
store i64 %conv, i64* %b.addr, align 4
ret void
@@ -381,12 +494,16 @@ entry:
define void @fptoui_double_i32(double %a) nounwind ssp {
entry:
; ELF64: fptoui_double_i32
+; ELF64LE: fptoui_double_i32
; PPC970: fptoui_double_i32
%b.addr = alloca i32, align 8
%conv = fptoui double %a to i32
; ELF64: fctiwuz
; ELF64: stfd
; ELF64: lwz
+; ELF64LE: fctiwuz
+; ELF64LE: stfd
+; ELF64LE: lwz
; PPC970: fctidz
; PPC970: stfd
; PPC970: lwz
@@ -397,12 +514,16 @@ entry:
define void @fptoui_double_i64(double %a) nounwind ssp {
entry:
; ELF64: fptoui_double_i64
+; ELF64LE: fptoui_double_i64
; PPC970: fptoui_double_i64
%b.addr = alloca i64, align 8
%conv = fptoui double %a to i64
; ELF64: fctiduz
; ELF64: stfd
; ELF64: ld
+; ELF64LE: fctiduz
+; ELF64LE: stfd
+; ELF64LE: ld
; PPC970-NOT: fctiduz
store i64 %conv, i64* %b.addr, align 8
ret void
diff --git a/test/CodeGen/PowerPC/fast-isel-ret.ll b/test/CodeGen/PowerPC/fast-isel-ret.ll
index fa19f8b11fd6..f82de70c9286 100644
--- a/test/CodeGen/PowerPC/fast-isel-ret.ll
+++ b/test/CodeGen/PowerPC/fast-isel-ret.ll
@@ -1,8 +1,40 @@
; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s --check-prefix=ELF64
+define zeroext i1 @rettrue() nounwind uwtable ssp {
+entry:
+; ELF64-LABEL: rettrue
+; ELF64: li 3, 1
+; ELF64: blr
+ ret i1 true
+}
+
+define zeroext i1 @retfalse() nounwind uwtable ssp {
+entry:
+; ELF64-LABEL: retfalse
+; ELF64: li 3, 0
+; ELF64: blr
+ ret i1 false
+}
+
+define signext i1 @retstrue() nounwind uwtable ssp {
+entry:
+; ELF64-LABEL: retstrue
+; ELF64: li 3, -1
+; ELF64: blr
+ ret i1 true
+}
+
+define signext i1 @retsfalse() nounwind uwtable ssp {
+entry:
+; ELF64-LABEL: retsfalse
+; ELF64: li 3, 0
+; ELF64: blr
+ ret i1 false
+}
+
define signext i8 @ret2(i8 signext %a) nounwind uwtable ssp {
entry:
-; ELF64: ret2
+; ELF64-LABEL: ret2
; ELF64: extsb
; ELF64: blr
ret i8 %a
@@ -10,7 +42,7 @@ entry:
define zeroext i8 @ret3(i8 signext %a) nounwind uwtable ssp {
entry:
-; ELF64: ret3
+; ELF64-LABEL: ret3
; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 56
; ELF64: blr
ret i8 %a
@@ -18,7 +50,7 @@ entry:
define signext i16 @ret4(i16 signext %a) nounwind uwtable ssp {
entry:
-; ELF64: ret4
+; ELF64-LABEL: ret4
; ELF64: extsh
; ELF64: blr
ret i16 %a
@@ -26,7 +58,7 @@ entry:
define zeroext i16 @ret5(i16 signext %a) nounwind uwtable ssp {
entry:
-; ELF64: ret5
+; ELF64-LABEL: ret5
; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 48
; ELF64: blr
ret i16 %a
@@ -34,7 +66,7 @@ entry:
define i16 @ret6(i16 %a) nounwind uwtable ssp {
entry:
-; ELF64: ret6
+; ELF64-LABEL: ret6
; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 48
; ELF64: blr
ret i16 %a
@@ -42,7 +74,7 @@ entry:
define signext i32 @ret7(i32 signext %a) nounwind uwtable ssp {
entry:
-; ELF64: ret7
+; ELF64-LABEL: ret7
; ELF64: extsw
; ELF64: blr
ret i32 %a
@@ -50,7 +82,7 @@ entry:
define zeroext i32 @ret8(i32 signext %a) nounwind uwtable ssp {
entry:
-; ELF64: ret8
+; ELF64-LABEL: ret8
; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 32
; ELF64: blr
ret i32 %a
@@ -58,7 +90,7 @@ entry:
define i32 @ret9(i32 %a) nounwind uwtable ssp {
entry:
-; ELF64: ret9
+; ELF64-LABEL: ret9
; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 32
; ELF64: blr
ret i32 %a
@@ -66,7 +98,7 @@ entry:
define i64 @ret10(i64 %a) nounwind uwtable ssp {
entry:
-; ELF64: ret10
+; ELF64-LABEL: ret10
; ELF64-NOT: exts
; ELF64-NOT: rldicl
; ELF64: blr
@@ -75,21 +107,21 @@ entry:
define float @ret11(float %a) nounwind uwtable ssp {
entry:
-; ELF64: ret11
+; ELF64-LABEL: ret11
; ELF64: blr
ret float %a
}
define double @ret12(double %a) nounwind uwtable ssp {
entry:
-; ELF64: ret12
+; ELF64-LABEL: ret12
; ELF64: blr
ret double %a
}
define i8 @ret13() nounwind uwtable ssp {
entry:
-; ELF64: ret13
+; ELF64-LABEL: ret13
; ELF64: li
; ELF64: blr
ret i8 15;
@@ -97,7 +129,7 @@ entry:
define i16 @ret14() nounwind uwtable ssp {
entry:
-; ELF64: ret14
+; ELF64-LABEL: ret14
; ELF64: li
; ELF64: blr
ret i16 -225;
@@ -105,7 +137,7 @@ entry:
define i32 @ret15() nounwind uwtable ssp {
entry:
-; ELF64: ret15
+; ELF64-LABEL: ret15
; ELF64: lis
; ELF64: ori
; ELF64: blr
@@ -114,7 +146,7 @@ entry:
define i64 @ret16() nounwind uwtable ssp {
entry:
-; ELF64: ret16
+; ELF64-LABEL: ret16
; ELF64: li
; ELF64: sldi
; ELF64: oris
@@ -125,7 +157,7 @@ entry:
define float @ret17() nounwind uwtable ssp {
entry:
-; ELF64: ret17
+; ELF64-LABEL: ret17
; ELF64: addis
; ELF64: lfs
; ELF64: blr
@@ -134,7 +166,7 @@ entry:
define double @ret18() nounwind uwtable ssp {
entry:
-; ELF64: ret18
+; ELF64-LABEL: ret18
; ELF64: addis
; ELF64: lfd
; ELF64: blr
diff --git a/test/CodeGen/PowerPC/ia-mem-r0.ll b/test/CodeGen/PowerPC/ia-mem-r0.ll
new file mode 100644
index 000000000000..4ab17edc5b10
--- /dev/null
+++ b/test/CodeGen/PowerPC/ia-mem-r0.ll
@@ -0,0 +1,94 @@
+; RUN: llc -mcpu=pwr7 < %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-n32:64"
+target triple = "powerpc64le-unknown-linux-gnu"
+
+; Make sure that we don't generate a std r, 0(0) -- the memory address cannot
+; be stored in r0.
+; CHECK-LABEL: @test1
+; CHECK-NOT: std {{[0-9]+}}, 0(0)
+; CHECK: blr
+
+define void @test1({ i8*, void (i8*, i8*)* } %fn_arg) {
+ %fn = alloca { i8*, void (i8*, i8*)* }
+ %sp = alloca i8*, align 8
+ %regs = alloca [18 x i64], align 8
+ store { i8*, void (i8*, i8*)* } %fn_arg, { i8*, void (i8*, i8*)* }* %fn
+ %1 = bitcast [18 x i64]* %regs to i64*
+ call void asm sideeffect "std 14, $0", "=*m"(i64* %1)
+ %2 = bitcast [18 x i64]* %regs to i8*
+ %3 = getelementptr i8* %2, i32 8
+ %4 = bitcast i8* %3 to i64*
+ call void asm sideeffect "std 15, $0", "=*m"(i64* %4)
+ %5 = bitcast [18 x i64]* %regs to i8*
+ %6 = getelementptr i8* %5, i32 16
+ %7 = bitcast i8* %6 to i64*
+ call void asm sideeffect "std 16, $0", "=*m"(i64* %7)
+ %8 = bitcast [18 x i64]* %regs to i8*
+ %9 = getelementptr i8* %8, i32 24
+ %10 = bitcast i8* %9 to i64*
+ call void asm sideeffect "std 17, $0", "=*m"(i64* %10)
+ %11 = bitcast [18 x i64]* %regs to i8*
+ %12 = getelementptr i8* %11, i32 32
+ %13 = bitcast i8* %12 to i64*
+ call void asm sideeffect "std 18, $0", "=*m"(i64* %13)
+ %14 = bitcast [18 x i64]* %regs to i8*
+ %15 = getelementptr i8* %14, i32 40
+ %16 = bitcast i8* %15 to i64*
+ call void asm sideeffect "std 19, $0", "=*m"(i64* %16)
+ %17 = bitcast [18 x i64]* %regs to i8*
+ %18 = getelementptr i8* %17, i32 48
+ %19 = bitcast i8* %18 to i64*
+ call void asm sideeffect "std 20, $0", "=*m"(i64* %19)
+ %20 = bitcast [18 x i64]* %regs to i8*
+ %21 = getelementptr i8* %20, i32 56
+ %22 = bitcast i8* %21 to i64*
+ call void asm sideeffect "std 21, $0", "=*m"(i64* %22)
+ %23 = bitcast [18 x i64]* %regs to i8*
+ %24 = getelementptr i8* %23, i32 64
+ %25 = bitcast i8* %24 to i64*
+ call void asm sideeffect "std 22, $0", "=*m"(i64* %25)
+ %26 = bitcast [18 x i64]* %regs to i8*
+ %27 = getelementptr i8* %26, i32 72
+ %28 = bitcast i8* %27 to i64*
+ call void asm sideeffect "std 23, $0", "=*m"(i64* %28)
+ %29 = bitcast [18 x i64]* %regs to i8*
+ %30 = getelementptr i8* %29, i32 80
+ %31 = bitcast i8* %30 to i64*
+ call void asm sideeffect "std 24, $0", "=*m"(i64* %31)
+ %32 = bitcast [18 x i64]* %regs to i8*
+ %33 = getelementptr i8* %32, i32 88
+ %34 = bitcast i8* %33 to i64*
+ call void asm sideeffect "std 25, $0", "=*m"(i64* %34)
+ %35 = bitcast [18 x i64]* %regs to i8*
+ %36 = getelementptr i8* %35, i32 96
+ %37 = bitcast i8* %36 to i64*
+ call void asm sideeffect "std 26, $0", "=*m"(i64* %37)
+ %38 = bitcast [18 x i64]* %regs to i8*
+ %39 = getelementptr i8* %38, i32 104
+ %40 = bitcast i8* %39 to i64*
+ call void asm sideeffect "std 27, $0", "=*m"(i64* %40)
+ %41 = bitcast [18 x i64]* %regs to i8*
+ %42 = getelementptr i8* %41, i32 112
+ %43 = bitcast i8* %42 to i64*
+ call void asm sideeffect "std 28, $0", "=*m"(i64* %43)
+ %44 = bitcast [18 x i64]* %regs to i8*
+ %45 = getelementptr i8* %44, i32 120
+ %46 = bitcast i8* %45 to i64*
+ call void asm sideeffect "std 29, $0", "=*m"(i64* %46)
+ %47 = bitcast [18 x i64]* %regs to i8*
+ %48 = getelementptr i8* %47, i32 128
+ %49 = bitcast i8* %48 to i64*
+ call void asm sideeffect "std 30, $0", "=*m"(i64* %49)
+ %50 = bitcast [18 x i64]* %regs to i8*
+ %51 = getelementptr i8* %50, i32 136
+ %52 = bitcast i8* %51 to i64*
+ call void asm sideeffect "std 31, $0", "=*m"(i64* %52)
+ %53 = getelementptr { i8*, void (i8*, i8*)* }* %fn, i32 0, i32 1
+ %.funcptr = load void (i8*, i8*)** %53
+ %54 = getelementptr { i8*, void (i8*, i8*)* }* %fn, i32 0, i32 0
+ %.ptr = load i8** %54
+ %55 = load i8** %sp
+ call void %.funcptr(i8* %.ptr, i8* %55)
+ ret void
+}
+
diff --git a/test/CodeGen/PowerPC/ia-neg-const.ll b/test/CodeGen/PowerPC/ia-neg-const.ll
new file mode 100644
index 000000000000..165fc1339d0b
--- /dev/null
+++ b/test/CodeGen/PowerPC/ia-neg-const.ll
@@ -0,0 +1,25 @@
+; RUN: llc -mcpu=pwr7 < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@.str = private unnamed_addr constant [5 x i8] c"%ld\0A\00", align 1
+
+; Function Attrs: nounwind
+define i64 @main() #0 {
+entry:
+ %x = alloca i64, align 8
+ store i64 0, i64* %x, align 8
+ %0 = call i64 asm sideeffect "ld $0,$1\0A\09add${2:I} $0,$0,$2", "=&r,*m,Ir"(i64* %x, i64 -1) #0
+ ret i64 %0
+}
+
+; CHECK: ld
+; CHECK-NOT: addi 3,3,4294967295
+; CHECK: addi 3,3,-1
+; CHECK: blr
+
+; Function Attrs: nounwind
+declare signext i32 @printf(i8* nocapture readonly, ...) #0
+
+attributes #0 = { nounwind }
+
diff --git a/test/CodeGen/PowerPC/stack-realign.ll b/test/CodeGen/PowerPC/stack-realign.ll
index a59fceb5bdd0..762f50a9cbe0 100644
--- a/test/CodeGen/PowerPC/stack-realign.ll
+++ b/test/CodeGen/PowerPC/stack-realign.ll
@@ -37,6 +37,7 @@ entry:
; CHECK-DAG: subfic 0, [[REG]], -160
; CHECK: stdux 1, 1, 0
+; CHECK: .cfi_def_cfa_register r30
; CHECK: .cfi_offset r30, -16
; CHECK: .cfi_offset lr, 16
@@ -59,6 +60,7 @@ entry:
; CHECK-FP-DAG: subfic 0, [[REG]], -160
; CHECK-FP: stdux 1, 1, 0
+; CHECK-FP: .cfi_def_cfa_register r30
; CHECK-FP: .cfi_offset r31, -8
; CHECK-FP: .cfi_offset r30, -16
; CHECK-FP: .cfi_offset lr, 16
@@ -120,6 +122,8 @@ entry:
; CHECK-DAG: subfc 0, [[REG3]], [[REG2]]
; CHECK: stdux 1, 1, 0
+; CHECK: .cfi_def_cfa_register r30
+
; CHECK: blr
; CHECK-32-LABEL: @hoo
@@ -178,6 +182,8 @@ entry:
; CHECK-DAG: subfic 0, [[REG]], -192
; CHECK: stdux 1, 1, 0
+; CHECK: .cfi_def_cfa_register r30
+
; CHECK: stfd 30, -16(30)
; CHECK: blr
@@ -193,6 +199,8 @@ entry:
; CHECK-FP-DAG: subfic 0, [[REG]], -192
; CHECK-FP: stdux 1, 1, 0
+; CHECK-FP: .cfi_def_cfa_register r30
+
; CHECK-FP: stfd 30, -16(30)
; CHECK-FP: blr
diff --git a/test/CodeGen/PowerPC/subreg-postra-2.ll b/test/CodeGen/PowerPC/subreg-postra-2.ll
new file mode 100644
index 000000000000..2faaa6129294
--- /dev/null
+++ b/test/CodeGen/PowerPC/subreg-postra-2.ll
@@ -0,0 +1,175 @@
+; RUN: llc -mcpu=pwr7 < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; Function Attrs: nounwind
+define void @jbd2_journal_commit_transaction() #0 {
+entry:
+ br i1 undef, label %do.body, label %if.then5
+
+if.then5: ; preds = %entry
+ unreachable
+
+do.body: ; preds = %entry
+ br i1 undef, label %do.body.i, label %trace_jbd2_start_commit.exit
+
+do.body.i: ; preds = %do.body
+ unreachable
+
+trace_jbd2_start_commit.exit: ; preds = %do.body
+ br i1 undef, label %do.body.i1116, label %trace_jbd2_commit_locking.exit
+
+do.body.i1116: ; preds = %trace_jbd2_start_commit.exit
+ unreachable
+
+trace_jbd2_commit_locking.exit: ; preds = %trace_jbd2_start_commit.exit
+ br i1 undef, label %while.end, label %while.body.lr.ph
+
+while.body.lr.ph: ; preds = %trace_jbd2_commit_locking.exit
+ unreachable
+
+while.end: ; preds = %trace_jbd2_commit_locking.exit
+ br i1 undef, label %spin_unlock.exit1146, label %if.then.i.i.i.i1144
+
+if.then.i.i.i.i1144: ; preds = %while.end
+ unreachable
+
+spin_unlock.exit1146: ; preds = %while.end
+ br i1 undef, label %spin_unlock.exit1154, label %if.then.i.i.i.i1152
+
+if.then.i.i.i.i1152: ; preds = %spin_unlock.exit1146
+ unreachable
+
+spin_unlock.exit1154: ; preds = %spin_unlock.exit1146
+ br i1 undef, label %do.body.i1159, label %trace_jbd2_commit_flushing.exit
+
+do.body.i1159: ; preds = %spin_unlock.exit1154
+ br i1 undef, label %if.end.i1166, label %do.body5.i1165
+
+do.body5.i1165: ; preds = %do.body.i1159
+ unreachable
+
+if.end.i1166: ; preds = %do.body.i1159
+ unreachable
+
+trace_jbd2_commit_flushing.exit: ; preds = %spin_unlock.exit1154
+ br i1 undef, label %for.end.i, label %for.body.lr.ph.i
+
+for.body.lr.ph.i: ; preds = %trace_jbd2_commit_flushing.exit
+ unreachable
+
+for.end.i: ; preds = %trace_jbd2_commit_flushing.exit
+ br i1 undef, label %journal_submit_data_buffers.exit, label %if.then.i.i.i.i31.i
+
+if.then.i.i.i.i31.i: ; preds = %for.end.i
+ br label %journal_submit_data_buffers.exit
+
+journal_submit_data_buffers.exit: ; preds = %if.then.i.i.i.i31.i, %for.end.i
+ br i1 undef, label %if.end103, label %if.then102
+
+if.then102: ; preds = %journal_submit_data_buffers.exit
+ unreachable
+
+if.end103: ; preds = %journal_submit_data_buffers.exit
+ br i1 undef, label %do.body.i1182, label %trace_jbd2_commit_logging.exit
+
+do.body.i1182: ; preds = %if.end103
+ br i1 undef, label %if.end.i1189, label %do.body5.i1188
+
+do.body5.i1188: ; preds = %do.body5.i1188, %do.body.i1182
+ br i1 undef, label %if.end.i1189, label %do.body5.i1188
+
+if.end.i1189: ; preds = %do.body5.i1188, %do.body.i1182
+ unreachable
+
+trace_jbd2_commit_logging.exit: ; preds = %if.end103
+ br label %while.cond129.outer1451
+
+while.cond129.outer1451: ; preds = %start_journal_io, %trace_jbd2_commit_logging.exit
+ br label %while.cond129
+
+while.cond129: ; preds = %if.then135, %while.cond129.outer1451
+ br i1 undef, label %while.end246, label %if.then135
+
+if.then135: ; preds = %while.cond129
+ br i1 undef, label %start_journal_io, label %while.cond129
+
+start_journal_io: ; preds = %if.then135
+ br label %while.cond129.outer1451
+
+while.end246: ; preds = %while.cond129
+ br i1 undef, label %for.end.i1287, label %for.body.i1277
+
+for.body.i1277: ; preds = %while.end246
+ unreachable
+
+for.end.i1287: ; preds = %while.end246
+ br i1 undef, label %journal_finish_inode_data_buffers.exit, label %if.then.i.i.i.i84.i
+
+if.then.i.i.i.i84.i: ; preds = %for.end.i1287
+ unreachable
+
+journal_finish_inode_data_buffers.exit: ; preds = %for.end.i1287
+ br i1 undef, label %if.end256, label %if.then249
+
+if.then249: ; preds = %journal_finish_inode_data_buffers.exit
+ unreachable
+
+if.end256: ; preds = %journal_finish_inode_data_buffers.exit
+ br label %while.body318
+
+while.body318: ; preds = %wait_on_buffer.exit, %if.end256
+ br i1 undef, label %wait_on_buffer.exit, label %if.then.i1296
+
+if.then.i1296: ; preds = %while.body318
+ br label %wait_on_buffer.exit
+
+wait_on_buffer.exit: ; preds = %if.then.i1296, %while.body318
+ br i1 undef, label %do.body378, label %while.body318
+
+do.body378: ; preds = %wait_on_buffer.exit
+ br i1 undef, label %while.end418, label %while.body392.lr.ph
+
+while.body392.lr.ph: ; preds = %do.body378
+ br label %while.body392
+
+while.body392: ; preds = %wait_on_buffer.exit1319, %while.body392.lr.ph
+ %0 = load i8** undef, align 8
+ %add.ptr399 = getelementptr inbounds i8* %0, i64 -72
+ %b_state.i.i1314 = bitcast i8* %add.ptr399 to i64*
+ %tobool.i1316 = icmp eq i64 undef, 0
+ br i1 %tobool.i1316, label %wait_on_buffer.exit1319, label %if.then.i1317
+
+if.then.i1317: ; preds = %while.body392
+ unreachable
+
+wait_on_buffer.exit1319: ; preds = %while.body392
+ %1 = load volatile i64* %b_state.i.i1314, align 8
+ %conv.i.i1322 = and i64 %1, 1
+ %lnot404 = icmp eq i64 %conv.i.i1322, 0
+ %.err.4 = select i1 %lnot404, i32 -5, i32 undef
+ %2 = call i64 asm sideeffect "1:.long 0x7c0000a8 $| ((($0) & 0x1f) << 21) $| (((0) & 0x1f) << 16) $| ((($3) & 0x1f) << 11) $| (((0) & 0x1) << 0) \0Aandc $0,$0,$2\0Astdcx. $0,0,$3\0Abne- 1b\0A", "=&r,=*m,r,r,*m,~{cc},~{memory}"(i64* %b_state.i.i1314, i64 262144, i64* %b_state.i.i1314, i64* %b_state.i.i1314) #0
+ store i8* %0, i8** undef, align 8
+ %cmp.i1312 = icmp eq i32* undef, undef
+ br i1 %cmp.i1312, label %while.end418, label %while.body392
+
+while.end418: ; preds = %wait_on_buffer.exit1319, %do.body378
+ %err.4.lcssa = phi i32 [ undef, %do.body378 ], [ %.err.4, %wait_on_buffer.exit1319 ]
+ %tobool419 = icmp eq i32 %err.4.lcssa, 0
+ br i1 %tobool419, label %if.end421, label %if.then420
+
+; CHECK-LABEL: @jbd2_journal_commit_transaction
+; CHECK: andi.
+; CHECK: cror [[REG:[0-9]+]], 1, 1
+; CHECK: stdcx.
+; CHECK: isel {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}, [[REG]]
+
+if.then420: ; preds = %while.end418
+ unreachable
+
+if.end421: ; preds = %while.end418
+ unreachable
+}
+
+attributes #0 = { nounwind }
+
diff --git a/test/CodeGen/PowerPC/subreg-postra.ll b/test/CodeGen/PowerPC/subreg-postra.ll
new file mode 100644
index 000000000000..b10fa668cb8d
--- /dev/null
+++ b/test/CodeGen/PowerPC/subreg-postra.ll
@@ -0,0 +1,168 @@
+; RUN: llc -mcpu=pwr7 < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; Function Attrs: nounwind
+define void @jbd2_journal_commit_transaction(i32* %journal) #0 {
+entry:
+ br i1 undef, label %do.body, label %if.then5
+
+if.then5: ; preds = %entry
+ unreachable
+
+do.body: ; preds = %entry
+ br i1 undef, label %do.body.i, label %trace_jbd2_start_commit.exit
+
+do.body.i: ; preds = %do.body
+ unreachable
+
+trace_jbd2_start_commit.exit: ; preds = %do.body
+ br i1 undef, label %do.body.i1116, label %trace_jbd2_commit_locking.exit
+
+do.body.i1116: ; preds = %trace_jbd2_start_commit.exit
+ br i1 undef, label %if.end.i1123, label %do.body5.i1122
+
+do.body5.i1122: ; preds = %do.body.i1116
+ unreachable
+
+if.end.i1123: ; preds = %do.body.i1116
+ br label %trace_jbd2_commit_locking.exit
+
+trace_jbd2_commit_locking.exit: ; preds = %if.end.i1123, %trace_jbd2_start_commit.exit
+ br i1 undef, label %spin_unlock.exit1146, label %if.then.i.i.i.i1144
+
+if.then.i.i.i.i1144: ; preds = %trace_jbd2_commit_locking.exit
+ unreachable
+
+spin_unlock.exit1146: ; preds = %trace_jbd2_commit_locking.exit
+ br i1 undef, label %spin_unlock.exit1154, label %if.then.i.i.i.i1152
+
+if.then.i.i.i.i1152: ; preds = %spin_unlock.exit1146
+ br label %spin_unlock.exit1154
+
+spin_unlock.exit1154: ; preds = %if.then.i.i.i.i1152, %spin_unlock.exit1146
+ br i1 undef, label %do.body.i1159, label %trace_jbd2_commit_flushing.exit
+
+do.body.i1159: ; preds = %spin_unlock.exit1154
+ unreachable
+
+trace_jbd2_commit_flushing.exit: ; preds = %spin_unlock.exit1154
+ br i1 undef, label %for.end.i, label %for.body.lr.ph.i
+
+for.body.lr.ph.i: ; preds = %trace_jbd2_commit_flushing.exit
+ br i1 undef, label %spin_unlock.exit.i, label %if.then.i.i.i.i.i
+
+if.then.i.i.i.i.i: ; preds = %for.body.lr.ph.i
+ unreachable
+
+spin_unlock.exit.i: ; preds = %for.body.lr.ph.i
+ unreachable
+
+for.end.i: ; preds = %trace_jbd2_commit_flushing.exit
+ br i1 undef, label %journal_submit_data_buffers.exit, label %if.then.i.i.i.i31.i
+
+if.then.i.i.i.i31.i: ; preds = %for.end.i
+ unreachable
+
+journal_submit_data_buffers.exit: ; preds = %for.end.i
+ br i1 undef, label %if.end103, label %if.then102
+
+if.then102: ; preds = %journal_submit_data_buffers.exit
+ unreachable
+
+if.end103: ; preds = %journal_submit_data_buffers.exit
+ br i1 undef, label %do.body.i1182, label %trace_jbd2_commit_logging.exit
+
+do.body.i1182: ; preds = %if.end103
+ unreachable
+
+trace_jbd2_commit_logging.exit: ; preds = %if.end103
+ br i1 undef, label %for.end.i1287, label %for.body.i1277
+
+for.body.i1277: ; preds = %trace_jbd2_commit_logging.exit
+ unreachable
+
+for.end.i1287: ; preds = %trace_jbd2_commit_logging.exit
+ br i1 undef, label %journal_finish_inode_data_buffers.exit, label %if.then.i.i.i.i84.i
+
+if.then.i.i.i.i84.i: ; preds = %for.end.i1287
+ unreachable
+
+journal_finish_inode_data_buffers.exit: ; preds = %for.end.i1287
+ br i1 undef, label %if.end256, label %if.then249
+
+if.then249: ; preds = %journal_finish_inode_data_buffers.exit
+ unreachable
+
+if.end256: ; preds = %journal_finish_inode_data_buffers.exit
+ br i1 undef, label %do.body277, label %if.then260
+
+if.then260: ; preds = %if.end256
+ br label %do.body277
+
+do.body277: ; preds = %if.then260, %if.end256
+ br label %while.body318
+
+while.body318: ; preds = %wait_on_buffer.exit, %do.body277
+ %tobool.i1295 = icmp eq i64 undef, 0
+ br i1 %tobool.i1295, label %wait_on_buffer.exit, label %if.then.i1296
+
+if.then.i1296: ; preds = %while.body318
+ unreachable
+
+wait_on_buffer.exit: ; preds = %while.body318
+ br i1 undef, label %do.body378, label %while.body318
+
+do.body378: ; preds = %wait_on_buffer.exit
+ br i1 undef, label %while.end418, label %while.body392.lr.ph
+
+while.body392.lr.ph: ; preds = %do.body378
+ br label %while.body392
+
+while.body392: ; preds = %wait_on_buffer.exit1319, %while.body392.lr.ph
+ %0 = load i8** undef, align 8
+ %add.ptr399 = getelementptr inbounds i8* %0, i64 -72
+ %b_state.i.i1314 = bitcast i8* %add.ptr399 to i64*
+ %tobool.i1316 = icmp eq i64 undef, 0
+ br i1 %tobool.i1316, label %wait_on_buffer.exit1319, label %if.then.i1317
+
+if.then.i1317: ; preds = %while.body392
+ unreachable
+
+wait_on_buffer.exit1319: ; preds = %while.body392
+ %1 = load volatile i64* %b_state.i.i1314, align 8
+ %conv.i.i1322 = and i64 %1, 1
+ %lnot404 = icmp eq i64 %conv.i.i1322, 0
+ %.err.4 = select i1 %lnot404, i32 -5, i32 undef
+ %2 = call i64 asm sideeffect "1:.long 0x7c0000a8 $| ((($0) & 0x1f) << 21) $| (((0) & 0x1f) << 16) $| ((($3) & 0x1f) << 11) $| (((0) & 0x1) << 0) \0Aandc $0,$0,$2\0Astdcx. $0,0,$3\0Abne- 1b\0A", "=&r,=*m,r,r,*m,~{cc},~{memory}"(i64* %b_state.i.i1314, i64 262144, i64* %b_state.i.i1314, i64* %b_state.i.i1314) #1
+ %prev.i.i.i1325 = getelementptr inbounds i8* %0, i64 8
+ %3 = load i32** null, align 8
+ store i32* %3, i32** undef, align 8
+ call void @__brelse(i32* undef) #1
+ br i1 undef, label %while.end418, label %while.body392
+
+; CHECK-LABEL: @jbd2_journal_commit_transaction
+; CHECK: andi.
+; CHECK: cror [[REG:[0-9]+]], 1, 1
+; CHECK: stdcx.
+; CHECK: isel {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}, [[REG]]
+
+while.end418: ; preds = %wait_on_buffer.exit1319, %do.body378
+ %err.4.lcssa = phi i32 [ undef, %do.body378 ], [ %.err.4, %wait_on_buffer.exit1319 ]
+ br i1 undef, label %if.end421, label %if.then420
+
+if.then420: ; preds = %while.end418
+ call void @jbd2_journal_abort(i32* %journal, i32 signext %err.4.lcssa) #1
+ br label %if.end421
+
+if.end421: ; preds = %if.then420, %while.end418
+ unreachable
+}
+
+declare void @jbd2_journal_abort(i32*, i32 signext)
+
+declare void @__brelse(i32*)
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind }
+
diff --git a/test/CodeGen/PowerPC/tls-pic.ll b/test/CodeGen/PowerPC/tls-pic.ll
index 9f3ab6e3b491..9ba372591e6e 100644
--- a/test/CodeGen/PowerPC/tls-pic.ll
+++ b/test/CodeGen/PowerPC/tls-pic.ll
@@ -1,5 +1,7 @@
; RUN: llc -march=ppc64 -mcpu=pwr7 -O0 -relocation-model=pic < %s | FileCheck -check-prefix=OPT0 %s
; RUN: llc -march=ppc64 -mcpu=pwr7 -O1 -relocation-model=pic < %s | FileCheck -check-prefix=OPT1 %s
+; RUN: llc -march=ppc32 -O0 -relocation-model=pic < %s | FileCheck -check-prefix=OPT0-32 %s
+; RUN: llc -march=ppc32 -O1 -relocation-model=pic < %s | FileCheck -check-prefix=OPT1-32 %s
target triple = "powerpc64-unknown-linux-gnu"
; Test correct assembly code generation for thread-local storage using
@@ -22,6 +24,16 @@ entry:
; OPT0-NEXT: nop
; OPT0: addis [[REG2:[0-9]+]], 3, a@dtprel@ha
; OPT0-NEXT: addi {{[0-9]+}}, [[REG2]], a@dtprel@l
+; OPT0-32-LABEL: main
+; OPT0-32: addi {{[0-9]+}}, {{[0-9]+}}, a@got@tlsld
+; OPT0-32: bl __tls_get_addr(a@tlsld)@PLT
+; OPT0-32: addis [[REG:[0-9]+]], 3, a@dtprel@ha
+; OPT0-32-NEXT: addi {{[0-9]+}}, [[REG]], a@dtprel@l
+; OPT1-32-LABEL: main
+; OPT1-32: addi 3, {{[0-9]+}}, a@got@tlsld
+; OPT1-32: bl __tls_get_addr(a@tlsld)@PLT
+; OPT1-32: addis [[REG:[0-9]+]], 3, a@dtprel@ha
+; OPT1-32-NEXT: addi {{[0-9]+}}, [[REG]], a@dtprel@l
; Test peephole optimization for thread-local storage using the
; local dynamic model.
@@ -52,4 +64,6 @@ entry:
; OPT1-NEXT: addi 3, [[REG]], a2@got@tlsgd@l
; OPT1: bl __tls_get_addr(a2@tlsgd)
; OPT1-NEXT: nop
-
+; OPT1-32-LABEL: main2
+; OPT1-32: addi 3, {{[0-9]+}}, a2@got@tlsgd
+; OPT1-32: bl __tls_get_addr(a2@tlsgd)@PLT
diff --git a/test/CodeGen/PowerPC/tls-store2.ll b/test/CodeGen/PowerPC/tls-store2.ll
new file mode 100644
index 000000000000..f884dd8a0a17
--- /dev/null
+++ b/test/CodeGen/PowerPC/tls-store2.ll
@@ -0,0 +1,33 @@
+; RUN: llc -march=ppc64 -mcpu=pwr7 -O2 -relocation-model=pic < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-n32:64"
+target triple = "powerpc64le-unknown-linux-gnu"
+
+; Test back-to-back stores of TLS variables to ensure call sequences no
+; longer overlap.
+
+@__once_callable = external thread_local global i8**
+@__once_call = external thread_local global void ()*
+
+define i64 @call_once(i64 %flag, i8* %ptr) {
+entry:
+ %var = alloca i8*, align 8
+ store i8* %ptr, i8** %var, align 8
+ store i8** %var, i8*** @__once_callable, align 8
+ store void ()* @__once_call_impl, void ()** @__once_call, align 8
+ ret i64 %flag
+}
+
+; CHECK-LABEL: call_once:
+; CHECK: addis 3, 2, __once_callable@got@tlsgd@ha
+; CHECK: addi 3, 3, __once_callable@got@tlsgd@l
+; CHECK: bl __tls_get_addr(__once_callable@tlsgd)
+; CHECK-NEXT: nop
+; CHECK: std {{[0-9]+}}, 0(3)
+; CHECK: addis 3, 2, __once_call@got@tlsgd@ha
+; CHECK: addi 3, 3, __once_call@got@tlsgd@l
+; CHECK: bl __tls_get_addr(__once_call@tlsgd)
+; CHECK-NEXT: nop
+; CHECK: std {{[0-9]+}}, 0(3)
+
+declare void @__once_call_impl()
diff --git a/test/MC/Disassembler/Mips/mips2.txt b/test/MC/Disassembler/Mips/mips2.txt
new file mode 100644
index 000000000000..a604055e62ef
--- /dev/null
+++ b/test/MC/Disassembler/Mips/mips2.txt
@@ -0,0 +1,13 @@
+# RUN: llvm-mc --disassemble %s -triple=mips-unknown-linux -mcpu=mips2 | FileCheck %s
+
+# CHECK: sdc3 $5, 9154($6)
+0xfc 0xc5 0x23 0xc2
+
+# CHECK: swc3 $6, 9158($7)
+0xec 0xe6 0x23 0xc6
+
+# CHECK: ldc3 $7, 9162($8)
+0xdd 0x07 0x23 0xca
+
+# CHECK: lwc3 $8, 9166($9)
+0xcd 0x28 0x23 0xce
diff --git a/test/MC/Disassembler/Mips/mips32.txt b/test/MC/Disassembler/Mips/mips32.txt
index bfb145e39596..bd4ae4daad04 100644
--- a/test/MC/Disassembler/Mips/mips32.txt
+++ b/test/MC/Disassembler/Mips/mips32.txt
@@ -1,4 +1,5 @@
# RUN: llvm-mc --disassemble %s -triple=mips-unknown-linux | FileCheck %s
+
# CHECK: abs.d $f12, $f14
0x46 0x20 0x73 0x05
@@ -436,3 +437,15 @@
# CHECK: rdhwr $5, $29
# CHECK: .set pop
0x7c 0x05 0xe8 0x3b
+
+# CHECK: cache 1, 2($3)
+0xbc 0x61 0x00 0x02
+
+# CHECK: pref 3, 4($2)
+0xcc 0x43 0x00 0x04
+
+# CHECK: swc2 $9, 9158($7)
+0xe8 0xe9 0x23 0xc6
+
+# CHECK: lwc2 $8, 9162($6)
+0xc8 0xc8 0x23 0xca
diff --git a/test/MC/Disassembler/Mips/mips64.txt b/test/MC/Disassembler/Mips/mips64.txt
index f3d2d100cae3..d494df6f9c24 100644
--- a/test/MC/Disassembler/Mips/mips64.txt
+++ b/test/MC/Disassembler/Mips/mips64.txt
@@ -85,3 +85,9 @@
# CHECK: sdxc1 $f8, $4($25)
0x4f 0x24 0x40 0x09
+
+# CHECK: sdc2 $9, 9158($7)
+0xf8 0xe9 0x23 0xc6
+
+# CHECK: ldc2 $3, 9162($8)
+0xd9 0x03 0x23 0xca
diff --git a/test/MC/PowerPC/ppc64-localentry.s b/test/MC/PowerPC/ppc64-localentry.s
index 6d2c12072289..03f760ef86ea 100644
--- a/test/MC/PowerPC/ppc64-localentry.s
+++ b/test/MC/PowerPC/ppc64-localentry.s
@@ -35,6 +35,9 @@ caller_other:
nop
.size caller_other, .-caller_other
+copy1 = callee1
+copy2 = callee2
+
# Verify that use of .localentry implies ABI version 2
# CHECK: ElfHeader {
# CHECK: Flags [ (0x2)
@@ -68,3 +71,19 @@ caller_other:
# CHECK-NEXT: Other: 0
# CHECK-NEXT: Section: .text
+# Verify that symbol assignment copies the Other bits.
+# CHECK: Name: copy1
+# CHECK-NEXT: Value:
+# CHECK-NEXT: Size: 16
+# CHECK-NEXT: Binding: Local
+# CHECK-NEXT: Type: Function
+# CHECK-NEXT: Other: 96
+# CHECK-NEXT: Section: .text
+# CHECK: Name: copy2
+# CHECK-NEXT: Value:
+# CHECK-NEXT: Size: 8
+# CHECK-NEXT: Binding: Local
+# CHECK-NEXT: Type: Function
+# CHECK-NEXT: Other: 0
+# CHECK-NEXT: Section: .text
+
diff --git a/test/Transforms/GCOVProfiling/linezero.ll b/test/Transforms/GCOVProfiling/linezero.ll
index e2f832498428..ba1c563c64ab 100644
--- a/test/Transforms/GCOVProfiling/linezero.ll
+++ b/test/Transforms/GCOVProfiling/linezero.ll
@@ -1,4 +1,4 @@
-; RUN: sed -e 's@PATTERN@\%T@g' < %s > %t1
+; RUN: sed -e 's|PATTERN|%T|g' < %s > %t1
; RUN: opt -insert-gcov-profiling -disable-output < %t1
; RUN: rm %T/linezero.gcno %t1
; REQUIRES: shell
diff --git a/test/Transforms/IndVarSimplify/2011-10-27-lftrnull.ll b/test/Transforms/IndVarSimplify/2011-10-27-lftrnull.ll
index a8020e6014b0..e462712fe7a5 100644
--- a/test/Transforms/IndVarSimplify/2011-10-27-lftrnull.ll
+++ b/test/Transforms/IndVarSimplify/2011-10-27-lftrnull.ll
@@ -6,7 +6,7 @@ target triple = "thumbv7-apple-darwin"
; CHECK-LABEL: @test(
; CHECK: if.end.i126:
-; CHECK: %exitcond = icmp ne i8* %incdec.ptr.i, getelementptr (i8* null, i32 undef)
+; CHECK: %exitcond = icmp ne i8* %destYPixelPtr.010.i, getelementptr (i8* null, i32 undef)
define void @test() nounwind {
entry:
br label %while.cond
diff --git a/test/Transforms/IndVarSimplify/lftr-address-space-pointers.ll b/test/Transforms/IndVarSimplify/lftr-address-space-pointers.ll
index e4c31d125c60..9e55a1791341 100644
--- a/test/Transforms/IndVarSimplify/lftr-address-space-pointers.ll
+++ b/test/Transforms/IndVarSimplify/lftr-address-space-pointers.ll
@@ -11,7 +11,7 @@ entry:
br i1 %cmp1, label %for.body, label %for.end
; Make sure the added GEP has the right index type
-; CHECK: %lftr.limit = getelementptr i8 addrspace(2)* %base, i8 %0
+; CHECK: %lftr.limit = getelementptr i8 addrspace(2)* %base, i8
; CHECK: for.body:
; CHECK: phi i8 addrspace(2)*
@@ -43,7 +43,7 @@ entry:
br i1 %cmp1, label %for.body, label %for.end
; Make sure the added GEP has the right index type
-; CHECK: %lftr.limit = getelementptr i8 addrspace(3)* %base, i16 %0
+; CHECK: %lftr.limit = getelementptr i8 addrspace(3)* %base, i16
; CHECK: for.body:
; CHECK: phi i8 addrspace(3)*
diff --git a/test/Transforms/IndVarSimplify/lftr-extend-const.ll b/test/Transforms/IndVarSimplify/lftr-extend-const.ll
index 4736f857bcce..eeffd0f55024 100644
--- a/test/Transforms/IndVarSimplify/lftr-extend-const.ll
+++ b/test/Transforms/IndVarSimplify/lftr-extend-const.ll
@@ -2,7 +2,7 @@
; CHECK-LABEL: @foo(
; CHECK-NOT: %lftr.wideiv = trunc i32 %indvars.iv.next to i16
-; CHECK: %exitcond = icmp ne i32 %indvars.iv.next, 512
+; CHECK: %exitcond = icmp ne i32 %indvars.iv, 511
define void @foo() #0 {
entry:
br label %for.body
@@ -21,7 +21,7 @@ for.end: ; preds = %for.body
; Check that post-incrementing the backedge taken count does not overflow.
; CHECK-LABEL: @postinc(
-; CHECK: icmp eq i32 %indvars.iv.next, 256
+; CHECK: icmp eq i32 %indvars.iv, 255
define i32 @postinc() #0 {
entry:
br label %do.body
diff --git a/test/Transforms/IndVarSimplify/lftr-reuse.ll b/test/Transforms/IndVarSimplify/lftr-reuse.ll
index 1fdcdd1ec3a4..efb96bdbe6b5 100644
--- a/test/Transforms/IndVarSimplify/lftr-reuse.ll
+++ b/test/Transforms/IndVarSimplify/lftr-reuse.ll
@@ -82,15 +82,23 @@ exit:
; Perform LFTR without generating extra preheader code.
define void @guardedloop([0 x double]* %matrix, [0 x double]* %vector,
i32 %irow, i32 %ilead) nounwind {
-; CHECK: entry:
-; CHECK-NOT: zext
-; CHECK-NOT: add
-; CHECK: loop:
-; CHECK: phi i64
-; CHECK: phi i64
+; CHECK-LABEL: @guardedloop(
+; CHECK-LABEL: entry:
+; CHECK-NEXT: %[[cmp:.*]] = icmp slt i32 1, %irow
+; CHECK-NEXT: br i1 %[[cmp]], label %[[loop_preheader:.*]], label %[[return:.*]]
+
+; CHECK: [[loop_preheader]]:
+; CHECK-NEXT: %[[sext:.*]] = sext i32 %ilead to i64
+; CHECK-NEXT: %[[add:.*]] = add i32 %irow, -1
+; CHECK-NEXT: br label %[[loop:.*]]
+
+; CHECK: [[loop]]:
+; CHECK-NEXT: %[[indvars_iv2:.*]] = phi i64
+; CHECK-NEXT: phi i64
; CHECK-NOT: phi
-; CHECK: icmp ne
-; CHECK: br i1
+; CHECK: %[[lftr_wideiv:.*]] = trunc i64 %[[indvars_iv2]] to i32
+; CHECK-NEXT: %[[exitcond:.*]] = icmp ne i32 %[[lftr_wideiv]], %[[add]]
+; CHECK-NEXT: br i1 %[[exitcond]], label %[[loop]], label
entry:
%cmp = icmp slt i32 1, %irow
br i1 %cmp, label %loop, label %return
diff --git a/test/Transforms/IndVarSimplify/pr20680.ll b/test/Transforms/IndVarSimplify/pr20680.ll
new file mode 100644
index 000000000000..88a7fd765d0b
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/pr20680.ll
@@ -0,0 +1,219 @@
+; RUN: opt < %s -indvars -S | FileCheck %s
+
+@a = common global i32 0, align 4
+@c = common global i32 0, align 4
+@b = common global i32 0, align 4
+
+define void @f() {
+; CHECK-LABEL: @f(
+; CHECK-LABEL: entry:
+; CHECK: br label %[[for_cond2_preheader:.*]]
+
+; CHECK: [[for_cond2_preheader]]:
+; CHECK-NEXT: %[[indvars_iv:.*]] = phi i32 [ %[[indvars_iv_next:.*]], %[[for_inc13:.*]] ], [ -14, %entry ]
+; br i1 {{.*}}, label %[[for_inc13]], label %
+entry:
+ %0 = load i32* @a, align 4
+ %tobool2 = icmp eq i32 %0, 0
+ %1 = load i32* @a, align 4
+ %tobool = icmp eq i32 %1, 0
+ br label %for.cond2.preheader
+
+for.cond2.preheader: ; preds = %for.inc13, %entry
+ %storemerge15 = phi i8 [ -14, %entry ], [ %inc14, %for.inc13 ]
+ br i1 %tobool2, label %for.inc13, label %for.body3.lr.ph
+
+for.body3.lr.ph: ; preds = %for.cond2.preheader
+ %tobool5 = icmp eq i8 %storemerge15, 0
+ %conv7 = sext i8 %storemerge15 to i32
+ %2 = add nsw i32 %conv7, 1
+ %3 = icmp ult i32 %2, 3
+ %div = select i1 %3, i32 %conv7, i32 0
+ br i1 %tobool5, label %for.body3.lr.ph.split.us, label %for.body3.lr.ph.for.body3.lr.ph.split_crit_edge
+
+for.body3.lr.ph.for.body3.lr.ph.split_crit_edge: ; preds = %for.body3.lr.ph
+ br label %for.body3.lr.ph.split
+
+for.body3.lr.ph.split.us: ; preds = %for.body3.lr.ph
+ br i1 %tobool, label %for.body3.lr.ph.split.us.split.us, label %for.body3.lr.ph.split.us.for.body3.lr.ph.split.us.split_crit_edge
+
+for.body3.lr.ph.split.us.for.body3.lr.ph.split.us.split_crit_edge: ; preds = %for.body3.lr.ph.split.us
+ br label %for.body3.lr.ph.split.us.split
+
+for.body3.lr.ph.split.us.split.us: ; preds = %for.body3.lr.ph.split.us
+ br label %for.body3.us.us
+
+for.body3.us.us: ; preds = %for.cond2.loopexit.us.us, %for.body3.lr.ph.split.us.split.us
+ br i1 true, label %cond.false.us.us, label %cond.end.us.us
+
+cond.false.us.us: ; preds = %for.body3.us.us
+ br label %cond.end.us.us
+
+cond.end.us.us: ; preds = %cond.false.us.us, %for.body3.us.us
+ %cond.us.us = phi i32 [ %div, %cond.false.us.us ], [ %conv7, %for.body3.us.us ]
+ %4 = load i32* @b, align 4
+ %cmp91.us.us = icmp slt i32 %4, 1
+ br i1 %cmp91.us.us, label %for.inc.lr.ph.us.us, label %for.cond2.loopexit.us.us
+
+for.cond2.loopexit.us.us: ; preds = %for.cond8.for.cond2.loopexit_crit_edge.us.us, %cond.end.us.us
+ br i1 true, label %for.cond2.for.inc13_crit_edge.us-lcssa.us.us-lcssa.us, label %for.body3.us.us
+
+for.inc.lr.ph.us.us: ; preds = %cond.end.us.us
+ br label %for.inc.us.us
+
+for.cond8.for.cond2.loopexit_crit_edge.us.us: ; preds = %for.inc.us.us
+ %inc.lcssa.us.us = phi i32 [ %inc.us.us, %for.inc.us.us ]
+ store i32 %inc.lcssa.us.us, i32* @b, align 4
+ br label %for.cond2.loopexit.us.us
+
+for.inc.us.us: ; preds = %for.inc.us.us, %for.inc.lr.ph.us.us
+ %5 = phi i32 [ %4, %for.inc.lr.ph.us.us ], [ %inc.us.us, %for.inc.us.us ]
+ %inc.us.us = add nsw i32 %5, 1
+ %cmp9.us.us = icmp slt i32 %inc.us.us, 1
+ br i1 %cmp9.us.us, label %for.inc.us.us, label %for.cond8.for.cond2.loopexit_crit_edge.us.us
+
+for.cond2.for.inc13_crit_edge.us-lcssa.us.us-lcssa.us: ; preds = %for.cond2.loopexit.us.us
+ %cond.lcssa.ph.us.ph.us = phi i32 [ %cond.us.us, %for.cond2.loopexit.us.us ]
+ br label %for.cond2.for.inc13_crit_edge.us-lcssa.us
+
+for.body3.lr.ph.split.us.split: ; preds = %for.body3.lr.ph.split.us.for.body3.lr.ph.split.us.split_crit_edge
+ br label %for.body3.us
+
+for.body3.us: ; preds = %for.cond2.loopexit.us, %for.body3.lr.ph.split.us.split
+ br i1 true, label %cond.false.us, label %cond.end.us
+
+cond.false.us: ; preds = %for.body3.us
+ br label %cond.end.us
+
+cond.end.us: ; preds = %cond.false.us, %for.body3.us
+ %cond.us = phi i32 [ %div, %cond.false.us ], [ %conv7, %for.body3.us ]
+ %6 = load i32* @b, align 4
+ %cmp91.us = icmp slt i32 %6, 1
+ br i1 %cmp91.us, label %for.inc.lr.ph.us, label %for.cond2.loopexit.us
+
+for.inc.us: ; preds = %for.inc.lr.ph.us, %for.inc.us
+ %7 = phi i32 [ %6, %for.inc.lr.ph.us ], [ %inc.us, %for.inc.us ]
+ %inc.us = add nsw i32 %7, 1
+ %cmp9.us = icmp slt i32 %inc.us, 1
+ br i1 %cmp9.us, label %for.inc.us, label %for.cond8.for.cond2.loopexit_crit_edge.us
+
+for.cond2.loopexit.us: ; preds = %for.cond8.for.cond2.loopexit_crit_edge.us, %cond.end.us
+ br i1 false, label %for.cond2.for.inc13_crit_edge.us-lcssa.us.us-lcssa, label %for.body3.us
+
+for.inc.lr.ph.us: ; preds = %cond.end.us
+ br label %for.inc.us
+
+for.cond8.for.cond2.loopexit_crit_edge.us: ; preds = %for.inc.us
+ %inc.lcssa.us = phi i32 [ %inc.us, %for.inc.us ]
+ store i32 %inc.lcssa.us, i32* @b, align 4
+ br label %for.cond2.loopexit.us
+
+for.cond2.for.inc13_crit_edge.us-lcssa.us.us-lcssa: ; preds = %for.cond2.loopexit.us
+ %cond.lcssa.ph.us.ph = phi i32 [ %cond.us, %for.cond2.loopexit.us ]
+ br label %for.cond2.for.inc13_crit_edge.us-lcssa.us
+
+for.cond2.for.inc13_crit_edge.us-lcssa.us: ; preds = %for.cond2.for.inc13_crit_edge.us-lcssa.us.us-lcssa, %for.cond2.for.inc13_crit_edge.us-lcssa.us.us-lcssa.us
+ %cond.lcssa.ph.us = phi i32 [ %cond.lcssa.ph.us.ph, %for.cond2.for.inc13_crit_edge.us-lcssa.us.us-lcssa ], [ %cond.lcssa.ph.us.ph.us, %for.cond2.for.inc13_crit_edge.us-lcssa.us.us-lcssa.us ]
+ br label %for.cond2.for.inc13_crit_edge
+
+for.body3.lr.ph.split: ; preds = %for.body3.lr.ph.for.body3.lr.ph.split_crit_edge
+ br i1 %tobool, label %for.body3.lr.ph.split.split.us, label %for.body3.lr.ph.split.for.body3.lr.ph.split.split_crit_edge
+
+for.body3.lr.ph.split.for.body3.lr.ph.split.split_crit_edge: ; preds = %for.body3.lr.ph.split
+ br label %for.body3.lr.ph.split.split
+
+for.body3.lr.ph.split.split.us: ; preds = %for.body3.lr.ph.split
+ br label %for.body3.us3
+
+for.body3.us3: ; preds = %for.cond2.loopexit.us11, %for.body3.lr.ph.split.split.us
+ br i1 false, label %cond.false.us4, label %cond.end.us5
+
+cond.false.us4: ; preds = %for.body3.us3
+ br label %cond.end.us5
+
+cond.end.us5: ; preds = %cond.false.us4, %for.body3.us3
+ %cond.us6 = phi i32 [ %div, %cond.false.us4 ], [ %conv7, %for.body3.us3 ]
+ %8 = load i32* @b, align 4
+ %cmp91.us7 = icmp slt i32 %8, 1
+ br i1 %cmp91.us7, label %for.inc.lr.ph.us12, label %for.cond2.loopexit.us11
+
+for.inc.us8: ; preds = %for.inc.lr.ph.us12, %for.inc.us8
+ %9 = phi i32 [ %8, %for.inc.lr.ph.us12 ], [ %inc.us9, %for.inc.us8 ]
+ %inc.us9 = add nsw i32 %9, 1
+ %cmp9.us10 = icmp slt i32 %inc.us9, 1
+ br i1 %cmp9.us10, label %for.inc.us8, label %for.cond8.for.cond2.loopexit_crit_edge.us13
+
+for.cond2.loopexit.us11: ; preds = %for.cond8.for.cond2.loopexit_crit_edge.us13, %cond.end.us5
+ br i1 true, label %for.cond2.for.inc13_crit_edge.us-lcssa.us-lcssa.us, label %for.body3.us3
+
+for.inc.lr.ph.us12: ; preds = %cond.end.us5
+ br label %for.inc.us8
+
+for.cond8.for.cond2.loopexit_crit_edge.us13: ; preds = %for.inc.us8
+ %inc.lcssa.us14 = phi i32 [ %inc.us9, %for.inc.us8 ]
+ store i32 %inc.lcssa.us14, i32* @b, align 4
+ br label %for.cond2.loopexit.us11
+
+for.cond2.for.inc13_crit_edge.us-lcssa.us-lcssa.us: ; preds = %for.cond2.loopexit.us11
+ %cond.lcssa.ph.ph.us = phi i32 [ %cond.us6, %for.cond2.loopexit.us11 ]
+ br label %for.cond2.for.inc13_crit_edge.us-lcssa
+
+for.body3.lr.ph.split.split: ; preds = %for.body3.lr.ph.split.for.body3.lr.ph.split.split_crit_edge
+ br label %for.body3
+
+for.cond8.for.cond2.loopexit_crit_edge: ; preds = %for.inc
+ %inc.lcssa = phi i32 [ %inc, %for.inc ]
+ store i32 %inc.lcssa, i32* @b, align 4
+ br label %for.cond2.loopexit
+
+for.cond2.loopexit: ; preds = %cond.end, %for.cond8.for.cond2.loopexit_crit_edge
+ br i1 false, label %for.cond2.for.inc13_crit_edge.us-lcssa.us-lcssa, label %for.body3
+
+for.body3: ; preds = %for.cond2.loopexit, %for.body3.lr.ph.split.split
+ br i1 false, label %cond.false, label %cond.end
+
+cond.false: ; preds = %for.body3
+ br label %cond.end
+
+cond.end: ; preds = %cond.false, %for.body3
+ %cond = phi i32 [ %div, %cond.false ], [ %conv7, %for.body3 ]
+ %10 = load i32* @b, align 4
+ %cmp91 = icmp slt i32 %10, 1
+ br i1 %cmp91, label %for.inc.lr.ph, label %for.cond2.loopexit
+
+for.inc.lr.ph: ; preds = %cond.end
+ br label %for.inc
+
+for.inc: ; preds = %for.inc, %for.inc.lr.ph
+ %11 = phi i32 [ %10, %for.inc.lr.ph ], [ %inc, %for.inc ]
+ %inc = add nsw i32 %11, 1
+ %cmp9 = icmp slt i32 %inc, 1
+ br i1 %cmp9, label %for.inc, label %for.cond8.for.cond2.loopexit_crit_edge
+
+for.cond2.for.inc13_crit_edge.us-lcssa.us-lcssa: ; preds = %for.cond2.loopexit
+ %cond.lcssa.ph.ph = phi i32 [ %cond, %for.cond2.loopexit ]
+ br label %for.cond2.for.inc13_crit_edge.us-lcssa
+
+for.cond2.for.inc13_crit_edge.us-lcssa: ; preds = %for.cond2.for.inc13_crit_edge.us-lcssa.us-lcssa, %for.cond2.for.inc13_crit_edge.us-lcssa.us-lcssa.us
+ %cond.lcssa.ph = phi i32 [ %cond.lcssa.ph.ph, %for.cond2.for.inc13_crit_edge.us-lcssa.us-lcssa ], [ %cond.lcssa.ph.ph.us, %for.cond2.for.inc13_crit_edge.us-lcssa.us-lcssa.us ]
+ br label %for.cond2.for.inc13_crit_edge
+
+for.cond2.for.inc13_crit_edge: ; preds = %for.cond2.for.inc13_crit_edge.us-lcssa, %for.cond2.for.inc13_crit_edge.us-lcssa.us
+ %cond.lcssa = phi i32 [ %cond.lcssa.ph, %for.cond2.for.inc13_crit_edge.us-lcssa ], [ %cond.lcssa.ph.us, %for.cond2.for.inc13_crit_edge.us-lcssa.us ]
+ store i32 %cond.lcssa, i32* @c, align 4
+ br label %for.inc13
+
+; CHECK: [[for_inc13]]:
+; CHECK-NEXT: %[[indvars_iv_next]] = add nuw nsw i32 %[[indvars_iv]], 1
+; CHECK-NEXT: %[[exitcond4:.*]] = icmp ne i32 %[[indvars_iv]], -1
+; CHECK-NEXT: br i1 %[[exitcond4]], label %[[for_cond2_preheader]], label %[[for_end15:.*]]
+for.inc13: ; preds = %for.cond2.for.inc13_crit_edge, %for.cond2.preheader
+ %inc14 = add i8 %storemerge15, 1
+ %cmp = icmp ugt i8 %inc14, 50
+ br i1 %cmp, label %for.cond2.preheader, label %for.end15
+
+; CHECK: [[for_end15]]:
+; CHECK-NEXT: ret void
+for.end15: ; preds = %for.inc13
+ ret void
+}
diff --git a/test/Transforms/LICM/PR21582.ll b/test/Transforms/LICM/PR21582.ll
new file mode 100644
index 000000000000..c068c2f8e32b
--- /dev/null
+++ b/test/Transforms/LICM/PR21582.ll
@@ -0,0 +1,40 @@
+; RUN: opt < %s -basicaa -licm -S | FileCheck %s
+@b = external global i32, align 4
+@fn3.i = external global i32, align 4
+
+declare i32 @g() nounwind
+
+define i32 @f() {
+entry:
+ br label %for.cond
+
+for.cond: ; preds = %for.end, %entry
+; CHECK-LABEL: for.cond:
+; CHECK: store i32 0, i32* @b
+ store i32 0, i32* @b, align 4
+ br i1 true, label %for.body.preheader, label %for.end
+
+for.body.preheader: ; preds = %for.cond
+ br label %for.body
+
+for.body: ; preds = %for.body, %for.body.preheader
+ %g.15 = phi i32 [ undef, %for.body ], [ 0, %for.body.preheader ]
+ %arrayidx2 = getelementptr inbounds i32* @fn3.i, i64 0
+ %0 = load i32* %arrayidx2, align 4
+ %call = call i32 @g()
+ br i1 false, label %for.body, label %for.end.loopexit
+
+for.end.loopexit: ; preds = %for.body
+ br label %for.end
+
+for.end: ; preds = %for.end.loopexit, %for.cond
+ %whatever = phi i32 [ %call, %for.end.loopexit ], [ undef, %for.cond ]
+ br i1 false, label %for.cond, label %if.then
+
+if.then: ; preds = %for.end
+; CHECK-LABEL: if.then:
+; CHECK: phi i32 [ {{.*}}, %for.end ]
+; CHECK-NOT: store i32 0, i32* @b
+; CHECK: ret i32
+ ret i32 %whatever
+}
diff --git a/test/Transforms/LICM/speculate.ll b/test/Transforms/LICM/speculate.ll
index 4244f157d9f8..69266693c479 100644
--- a/test/Transforms/LICM/speculate.ll
+++ b/test/Transforms/LICM/speculate.ll
@@ -3,12 +3,11 @@
; UDiv is safe to speculate if the denominator is known non-zero.
; CHECK-LABEL: @safe_udiv(
-; CHECK: %div = udiv i64 %x, %or
+; CHECK: %div = udiv i64 %x, 2
; CHECK-NEXT: br label %for.body
define void @safe_udiv(i64 %x, i64 %m, i64 %n, i32* %p, i64* %q) nounwind {
entry:
- %or = or i64 %m, 1
br label %for.body
for.body: ; preds = %entry, %for.inc
@@ -19,7 +18,7 @@ for.body: ; preds = %entry, %for.inc
br i1 %tobool, label %for.inc, label %if.then
if.then: ; preds = %for.body
- %div = udiv i64 %x, %or
+ %div = udiv i64 %x, 2
%arrayidx1 = getelementptr inbounds i64* %q, i64 %i.02
store i64 %div, i64* %arrayidx1, align 8
br label %for.inc
@@ -69,13 +68,12 @@ for.end: ; preds = %for.inc, %entry
; known to have at least one zero bit.
; CHECK-LABEL: @safe_sdiv(
-; CHECK: %div = sdiv i64 %x, %or
+; CHECK: %div = sdiv i64 %x, 2
; CHECK-NEXT: br label %for.body
define void @safe_sdiv(i64 %x, i64 %m, i64 %n, i32* %p, i64* %q) nounwind {
entry:
%and = and i64 %m, -3
- %or = or i64 %and, 1
br label %for.body
for.body: ; preds = %entry, %for.inc
@@ -86,7 +84,7 @@ for.body: ; preds = %entry, %for.inc
br i1 %tobool, label %for.inc, label %if.then
if.then: ; preds = %for.body
- %div = sdiv i64 %x, %or
+ %div = sdiv i64 %x, 2
%arrayidx1 = getelementptr inbounds i64* %q, i64 %i.02
store i64 %div, i64* %arrayidx1, align 8
br label %for.inc
diff --git a/test/Transforms/LoopVectorize/incorrect-dom-info.ll b/test/Transforms/LoopVectorize/incorrect-dom-info.ll
new file mode 100644
index 000000000000..624ee7e50597
--- /dev/null
+++ b/test/Transforms/LoopVectorize/incorrect-dom-info.ll
@@ -0,0 +1,142 @@
+; This test is based on one of benchmarks from SPEC2006. It exposes a bug with
+; incorrect updating of the dom-tree.
+; RUN: opt < %s -loop-vectorize -verify-dom-info
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+@PL_utf8skip = external constant [0 x i8]
+
+; Function Attrs: nounwind ssp uwtable
+define void @Perl_pp_quotemeta() #0 {
+ %len = alloca i64, align 8
+ br i1 undef, label %2, label %1
+
+; <label>:1 ; preds = %0
+ br label %3
+
+; <label>:2 ; preds = %0
+ br label %3
+
+; <label>:3 ; preds = %2, %1
+ br i1 undef, label %34, label %4
+
+; <label>:4 ; preds = %3
+ br i1 undef, label %5, label %6
+
+; <label>:5 ; preds = %4
+ br label %6
+
+; <label>:6 ; preds = %5, %4
+ br i1 undef, label %7, label %8
+
+; <label>:7 ; preds = %6
+ br label %8
+
+; <label>:8 ; preds = %7, %6
+ br i1 undef, label %.preheader, label %9
+
+.preheader: ; preds = %9, %8
+ br i1 undef, label %.loopexit, label %.lr.ph
+
+; <label>:9 ; preds = %8
+ br i1 undef, label %thread-pre-split.preheader, label %.preheader
+
+thread-pre-split.preheader: ; preds = %9
+ br i1 undef, label %thread-pre-split._crit_edge, label %.lr.ph21
+
+.thread-pre-split.loopexit_crit_edge: ; preds = %19
+ %scevgep.sum = xor i64 %umax, -1
+ %scevgep45 = getelementptr i8* %d.020, i64 %scevgep.sum
+ br label %thread-pre-split.loopexit
+
+thread-pre-split.loopexit: ; preds = %11, %.thread-pre-split.loopexit_crit_edge
+ %d.1.lcssa = phi i8* [ %scevgep45, %.thread-pre-split.loopexit_crit_edge ], [ %d.020, %11 ]
+ br i1 false, label %thread-pre-split._crit_edge, label %.lr.ph21
+
+.lr.ph21: ; preds = %26, %thread-pre-split.loopexit, %thread-pre-split.preheader
+ %d.020 = phi i8* [ undef, %26 ], [ %d.1.lcssa, %thread-pre-split.loopexit ], [ undef, %thread-pre-split.preheader ]
+ %10 = phi i64 [ %28, %26 ], [ undef, %thread-pre-split.loopexit ], [ undef, %thread-pre-split.preheader ]
+ br i1 undef, label %11, label %22
+
+; <label>:11 ; preds = %.lr.ph21
+ %12 = getelementptr inbounds [0 x i8]* @PL_utf8skip, i64 0, i64 undef
+ %13 = load i8* %12, align 1
+ %14 = zext i8 %13 to i64
+ %15 = icmp ugt i64 %14, %10
+ %. = select i1 %15, i64 %10, i64 %14
+ br i1 undef, label %thread-pre-split.loopexit, label %.lr.ph28
+
+.lr.ph28: ; preds = %11
+ %16 = xor i64 %10, -1
+ %17 = xor i64 %14, -1
+ %18 = icmp ugt i64 %16, %17
+ %umax = select i1 %18, i64 %16, i64 %17
+ br label %19
+
+; <label>:19 ; preds = %19, %.lr.ph28
+ %ulen.126 = phi i64 [ %., %.lr.ph28 ], [ %20, %19 ]
+ %20 = add i64 %ulen.126, -1
+ %21 = icmp eq i64 %20, 0
+ br i1 %21, label %.thread-pre-split.loopexit_crit_edge, label %19
+
+; <label>:22 ; preds = %.lr.ph21
+ br i1 undef, label %26, label %23
+
+; <label>:23 ; preds = %22
+ br i1 undef, label %26, label %24
+
+; <label>:24 ; preds = %23
+ br i1 undef, label %26, label %25
+
+; <label>:25 ; preds = %24
+ br label %26
+
+; <label>:26 ; preds = %25, %24, %23, %22
+ %27 = load i64* %len, align 8
+ %28 = add i64 %27, -1
+ br i1 undef, label %thread-pre-split._crit_edge, label %.lr.ph21
+
+thread-pre-split._crit_edge: ; preds = %26, %thread-pre-split.loopexit, %thread-pre-split.preheader
+ br label %.loopexit
+
+.lr.ph: ; preds = %33, %.preheader
+ br i1 undef, label %29, label %thread-pre-split5
+
+; <label>:29 ; preds = %.lr.ph
+ br i1 undef, label %33, label %30
+
+; <label>:30 ; preds = %29
+ br i1 undef, label %33, label %31
+
+thread-pre-split5: ; preds = %.lr.ph
+ br i1 undef, label %33, label %31
+
+; <label>:31 ; preds = %thread-pre-split5, %30
+ br i1 undef, label %33, label %32
+
+; <label>:32 ; preds = %31
+ br label %33
+
+; <label>:33 ; preds = %32, %31, %thread-pre-split5, %30, %29
+ br i1 undef, label %.loopexit, label %.lr.ph
+
+.loopexit: ; preds = %33, %thread-pre-split._crit_edge, %.preheader
+ br label %35
+
+; <label>:34 ; preds = %3
+ br label %35
+
+; <label>:35 ; preds = %34, %.loopexit
+ br i1 undef, label %37, label %36
+
+; <label>:36 ; preds = %35
+ br label %37
+
+; <label>:37 ; preds = %36, %35
+ ret void
+}
+
+attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.ident = !{!0}
+
+!0 = metadata !{metadata !"clang version 3.6.0 "}
diff --git a/test/Transforms/LoopVectorize/loop-form.ll b/test/Transforms/LoopVectorize/loop-form.ll
new file mode 100644
index 000000000000..138df1d96138
--- /dev/null
+++ b/test/Transforms/LoopVectorize/loop-form.ll
@@ -0,0 +1,31 @@
+; RUN: opt -S -loop-vectorize < %s | FileCheck %s
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+; Check that we vectorize only bottom-tested loops.
+; This is a reduced testcase from PR21302.
+;
+; rdar://problem/18886083
+
+%struct.X = type { i32, i16 }
+; CHECK-LABEL: @foo(
+; CHECK-NOT: vector.body
+
+define void @foo(i32 %n) {
+entry:
+ br label %for.cond
+
+for.cond:
+ %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+ %cmp = icmp slt i32 %i, %n
+ br i1 %cmp, label %for.body, label %if.end
+
+for.body:
+ %iprom = sext i32 %i to i64
+ %b = getelementptr inbounds %struct.X* undef, i64 %iprom, i32 1
+ store i16 0, i16* %b, align 4
+ %inc = add nsw i32 %i, 1
+ br label %for.cond
+
+if.end:
+ ret void
+}
diff --git a/test/Transforms/LoopVectorize/runtime-check-address-space.ll b/test/Transforms/LoopVectorize/runtime-check-address-space.ll
index 5bf7020a475a..848ffc428c5f 100644
--- a/test/Transforms/LoopVectorize/runtime-check-address-space.ll
+++ b/test/Transforms/LoopVectorize/runtime-check-address-space.ll
@@ -31,25 +31,23 @@ define void @foo(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 %n) #0 {
; CHECK: ret
entry:
- br label %for.cond
+ %cmp1 = icmp slt i32 0, %n
+ br i1 %cmp1, label %for.body, label %for.end
-for.cond: ; preds = %for.body, %entry
- %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
- %cmp = icmp slt i32 %i.0, %n
- br i1 %cmp, label %for.body, label %for.end
-
-for.body: ; preds = %for.cond
- %idxprom = sext i32 %i.0 to i64
+for.body: ; preds = %entry, %for.body
+ %i.02 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+ %idxprom = sext i32 %i.02 to i64
%arrayidx = getelementptr inbounds i32 addrspace(1)* %b, i64 %idxprom
%0 = load i32 addrspace(1)* %arrayidx, align 4
%mul = mul nsw i32 %0, 3
- %idxprom1 = sext i32 %i.0 to i64
+ %idxprom1 = sext i32 %i.02 to i64
%arrayidx2 = getelementptr inbounds i32 addrspace(1)* %a, i64 %idxprom1
store i32 %mul, i32 addrspace(1)* %arrayidx2, align 4
- %inc = add nsw i32 %i.0, 1
- br label %for.cond
+ %inc = add nsw i32 %i.02, 1
+ %cmp = icmp slt i32 %inc, %n
+ br i1 %cmp, label %for.body, label %for.end
-for.end: ; preds = %for.cond
+for.end: ; preds = %for.body, %entry
ret void
}
@@ -60,25 +58,23 @@ define void @bar0(i32* %a, i32 addrspace(1)* %b, i32 %n) #0 {
; CHECK: ret
entry:
- br label %for.cond
+ %cmp1 = icmp slt i32 0, %n
+ br i1 %cmp1, label %for.body, label %for.end
-for.cond: ; preds = %for.body, %entry
- %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
- %cmp = icmp slt i32 %i.0, %n
- br i1 %cmp, label %for.body, label %for.end
-
-for.body: ; preds = %for.cond
- %idxprom = sext i32 %i.0 to i64
+for.body: ; preds = %entry, %for.body
+ %i.02 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+ %idxprom = sext i32 %i.02 to i64
%arrayidx = getelementptr inbounds i32 addrspace(1)* %b, i64 %idxprom
%0 = load i32 addrspace(1)* %arrayidx, align 4
%mul = mul nsw i32 %0, 3
- %idxprom1 = sext i32 %i.0 to i64
+ %idxprom1 = sext i32 %i.02 to i64
%arrayidx2 = getelementptr inbounds i32* %a, i64 %idxprom1
store i32 %mul, i32* %arrayidx2, align 4
- %inc = add nsw i32 %i.0, 1
- br label %for.cond
+ %inc = add nsw i32 %i.02, 1
+ %cmp = icmp slt i32 %inc, %n
+ br i1 %cmp, label %for.body, label %for.end
-for.end: ; preds = %for.cond
+for.end: ; preds = %for.body, %entry
ret void
}
@@ -89,25 +85,23 @@ define void @bar1(i32 addrspace(1)* %a, i32* %b, i32 %n) #0 {
; CHECK: ret
entry:
- br label %for.cond
+ %cmp1 = icmp slt i32 0, %n
+ br i1 %cmp1, label %for.body, label %for.end
-for.cond: ; preds = %for.body, %entry
- %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
- %cmp = icmp slt i32 %i.0, %n
- br i1 %cmp, label %for.body, label %for.end
-
-for.body: ; preds = %for.cond
- %idxprom = sext i32 %i.0 to i64
+for.body: ; preds = %entry, %for.body
+ %i.02 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+ %idxprom = sext i32 %i.02 to i64
%arrayidx = getelementptr inbounds i32* %b, i64 %idxprom
%0 = load i32* %arrayidx, align 4
%mul = mul nsw i32 %0, 3
- %idxprom1 = sext i32 %i.0 to i64
+ %idxprom1 = sext i32 %i.02 to i64
%arrayidx2 = getelementptr inbounds i32 addrspace(1)* %a, i64 %idxprom1
store i32 %mul, i32 addrspace(1)* %arrayidx2, align 4
- %inc = add nsw i32 %i.0, 1
- br label %for.cond
+ %inc = add nsw i32 %i.02, 1
+ %cmp = icmp slt i32 %inc, %n
+ br i1 %cmp, label %for.body, label %for.end
-for.end: ; preds = %for.cond
+for.end: ; preds = %for.body, %entry
ret void
}
@@ -119,25 +113,23 @@ define void @bar2(i32* noalias %a, i32 addrspace(1)* noalias %b, i32 %n) #0 {
; CHECK: ret
entry:
- br label %for.cond
-
-for.cond: ; preds = %for.body, %entry
- %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
- %cmp = icmp slt i32 %i.0, %n
- br i1 %cmp, label %for.body, label %for.end
+ %cmp1 = icmp slt i32 0, %n
+ br i1 %cmp1, label %for.body, label %for.end
-for.body: ; preds = %for.cond
- %idxprom = sext i32 %i.0 to i64
+for.body: ; preds = %entry, %for.body
+ %i.02 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+ %idxprom = sext i32 %i.02 to i64
%arrayidx = getelementptr inbounds i32 addrspace(1)* %b, i64 %idxprom
%0 = load i32 addrspace(1)* %arrayidx, align 4
%mul = mul nsw i32 %0, 3
- %idxprom1 = sext i32 %i.0 to i64
+ %idxprom1 = sext i32 %i.02 to i64
%arrayidx2 = getelementptr inbounds i32* %a, i64 %idxprom1
store i32 %mul, i32* %arrayidx2, align 4
- %inc = add nsw i32 %i.0, 1
- br label %for.cond
+ %inc = add nsw i32 %i.02, 1
+ %cmp = icmp slt i32 %inc, %n
+ br i1 %cmp, label %for.body, label %for.end
-for.end: ; preds = %for.cond
+for.end: ; preds = %for.body, %entry
ret void
}
@@ -149,25 +141,23 @@ define void @arst0(i32* %b, i32 %n) #0 {
; CHECK: ret
entry:
- br label %for.cond
-
-for.cond: ; preds = %for.body, %entry
- %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
- %cmp = icmp slt i32 %i.0, %n
- br i1 %cmp, label %for.body, label %for.end
+ %cmp1 = icmp slt i32 0, %n
+ br i1 %cmp1, label %for.body, label %for.end
-for.body: ; preds = %for.cond
- %idxprom = sext i32 %i.0 to i64
+for.body: ; preds = %entry, %for.body
+ %i.02 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+ %idxprom = sext i32 %i.02 to i64
%arrayidx = getelementptr inbounds i32* %b, i64 %idxprom
%0 = load i32* %arrayidx, align 4
%mul = mul nsw i32 %0, 3
- %idxprom1 = sext i32 %i.0 to i64
+ %idxprom1 = sext i32 %i.02 to i64
%arrayidx2 = getelementptr inbounds [1024 x i32] addrspace(1)* @g_as1, i64 0, i64 %idxprom1
store i32 %mul, i32 addrspace(1)* %arrayidx2, align 4
- %inc = add nsw i32 %i.0, 1
- br label %for.cond
+ %inc = add nsw i32 %i.02, 1
+ %cmp = icmp slt i32 %inc, %n
+ br i1 %cmp, label %for.body, label %for.end
-for.end: ; preds = %for.cond
+for.end: ; preds = %for.body, %entry
ret void
}
@@ -180,25 +170,23 @@ define void @arst1(i32* %b, i32 %n) #0 {
; CHECK: ret
entry:
- br label %for.cond
-
-for.cond: ; preds = %for.body, %entry
- %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
- %cmp = icmp slt i32 %i.0, %n
- br i1 %cmp, label %for.body, label %for.end
+ %cmp1 = icmp slt i32 0, %n
+ br i1 %cmp1, label %for.body, label %for.end
-for.body: ; preds = %for.cond
- %idxprom = sext i32 %i.0 to i64
+for.body: ; preds = %entry, %for.body
+ %i.02 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+ %idxprom = sext i32 %i.02 to i64
%arrayidx = getelementptr inbounds [1024 x i32] addrspace(1)* @g_as1, i64 0, i64 %idxprom
%0 = load i32 addrspace(1)* %arrayidx, align 4
%mul = mul nsw i32 %0, 3
- %idxprom1 = sext i32 %i.0 to i64
+ %idxprom1 = sext i32 %i.02 to i64
%arrayidx2 = getelementptr inbounds i32* %b, i64 %idxprom1
store i32 %mul, i32* %arrayidx2, align 4
- %inc = add nsw i32 %i.0, 1
- br label %for.cond
+ %inc = add nsw i32 %i.02, 1
+ %cmp = icmp slt i32 %inc, %n
+ br i1 %cmp, label %for.body, label %for.end
-for.end: ; preds = %for.cond
+for.end: ; preds = %for.body, %entry
ret void
}
@@ -210,25 +198,23 @@ define void @aoeu(i32 %n) #0 {
; CHECK: ret
entry:
- br label %for.cond
+ %cmp1 = icmp slt i32 0, %n
+ br i1 %cmp1, label %for.body, label %for.end
-for.cond: ; preds = %for.body, %entry
- %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
- %cmp = icmp slt i32 %i.0, %n
- br i1 %cmp, label %for.body, label %for.end
-
-for.body: ; preds = %for.cond
- %idxprom = sext i32 %i.0 to i64
+for.body: ; preds = %entry, %for.body
+ %i.02 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+ %idxprom = sext i32 %i.02 to i64
%arrayidx = getelementptr inbounds [1024 x i32] addrspace(2)* @q_as2, i64 0, i64 %idxprom
%0 = load i32 addrspace(2)* %arrayidx, align 4
%mul = mul nsw i32 %0, 3
- %idxprom1 = sext i32 %i.0 to i64
+ %idxprom1 = sext i32 %i.02 to i64
%arrayidx2 = getelementptr inbounds [1024 x i32] addrspace(1)* @g_as1, i64 0, i64 %idxprom1
store i32 %mul, i32 addrspace(1)* %arrayidx2, align 4
- %inc = add nsw i32 %i.0, 1
- br label %for.cond
+ %inc = add nsw i32 %i.02, 1
+ %cmp = icmp slt i32 %inc, %n
+ br i1 %cmp, label %for.body, label %for.end
-for.end: ; preds = %for.cond
+for.end: ; preds = %for.body, %entry
ret void
}
diff --git a/test/Transforms/LoopVectorize/runtime-check-readonly-address-space.ll b/test/Transforms/LoopVectorize/runtime-check-readonly-address-space.ll
index 212b37cceab3..4e98ec504f5b 100644
--- a/test/Transforms/LoopVectorize/runtime-check-readonly-address-space.ll
+++ b/test/Transforms/LoopVectorize/runtime-check-readonly-address-space.ll
@@ -8,26 +8,24 @@ define void @add_ints_1_1_1(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 addr
; CHECK-LABEL: @add_ints_1_1_1(
; CHECK: <4 x i32>
; CHECK: ret
-entry:
- br label %for.cond
-for.cond: ; preds = %for.body, %entry
- %i.0 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
- %cmp = icmp ult i64 %i.0, 200
- br i1 %cmp, label %for.body, label %for.end
+entry:
+ br label %for.body
-for.body: ; preds = %for.cond
- %arrayidx = getelementptr inbounds i32 addrspace(1)* %b, i64 %i.0
+for.body: ; preds = %entry, %for.body
+ %i.01 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+ %arrayidx = getelementptr inbounds i32 addrspace(1)* %b, i64 %i.01
%0 = load i32 addrspace(1)* %arrayidx, align 4
- %arrayidx1 = getelementptr inbounds i32 addrspace(1)* %c, i64 %i.0
+ %arrayidx1 = getelementptr inbounds i32 addrspace(1)* %c, i64 %i.01
%1 = load i32 addrspace(1)* %arrayidx1, align 4
%add = add nsw i32 %0, %1
- %arrayidx2 = getelementptr inbounds i32 addrspace(1)* %a, i64 %i.0
+ %arrayidx2 = getelementptr inbounds i32 addrspace(1)* %a, i64 %i.01
store i32 %add, i32 addrspace(1)* %arrayidx2, align 4
- %inc = add i64 %i.0, 1
- br label %for.cond
+ %inc = add i64 %i.01, 1
+ %cmp = icmp ult i64 %inc, 200
+ br i1 %cmp, label %for.body, label %for.end
-for.end: ; preds = %for.cond
+for.end: ; preds = %for.body
ret void
}
@@ -35,26 +33,24 @@ define void @add_ints_as_1_0_0(i32 addrspace(1)* %a, i32* %b, i32* %c) #0 {
; CHECK-LABEL: @add_ints_as_1_0_0(
; CHECK-NOT: <4 x i32>
; CHECK: ret
-entry:
- br label %for.cond
-for.cond: ; preds = %for.body, %entry
- %i.0 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
- %cmp = icmp ult i64 %i.0, 200
- br i1 %cmp, label %for.body, label %for.end
+entry:
+ br label %for.body
-for.body: ; preds = %for.cond
- %arrayidx = getelementptr inbounds i32* %b, i64 %i.0
+for.body: ; preds = %entry, %for.body
+ %i.01 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+ %arrayidx = getelementptr inbounds i32* %b, i64 %i.01
%0 = load i32* %arrayidx, align 4
- %arrayidx1 = getelementptr inbounds i32* %c, i64 %i.0
+ %arrayidx1 = getelementptr inbounds i32* %c, i64 %i.01
%1 = load i32* %arrayidx1, align 4
%add = add nsw i32 %0, %1
- %arrayidx2 = getelementptr inbounds i32 addrspace(1)* %a, i64 %i.0
+ %arrayidx2 = getelementptr inbounds i32 addrspace(1)* %a, i64 %i.01
store i32 %add, i32 addrspace(1)* %arrayidx2, align 4
- %inc = add i64 %i.0, 1
- br label %for.cond
+ %inc = add i64 %i.01, 1
+ %cmp = icmp ult i64 %inc, 200
+ br i1 %cmp, label %for.body, label %for.end
-for.end: ; preds = %for.cond
+for.end: ; preds = %for.body
ret void
}
@@ -62,26 +58,24 @@ define void @add_ints_as_0_1_0(i32* %a, i32 addrspace(1)* %b, i32* %c) #0 {
; CHECK-LABEL: @add_ints_as_0_1_0(
; CHECK-NOT: <4 x i32>
; CHECK: ret
-entry:
- br label %for.cond
-for.cond: ; preds = %for.body, %entry
- %i.0 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
- %cmp = icmp ult i64 %i.0, 200
- br i1 %cmp, label %for.body, label %for.end
+entry:
+ br label %for.body
-for.body: ; preds = %for.cond
- %arrayidx = getelementptr inbounds i32 addrspace(1)* %b, i64 %i.0
+for.body: ; preds = %entry, %for.body
+ %i.01 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+ %arrayidx = getelementptr inbounds i32 addrspace(1)* %b, i64 %i.01
%0 = load i32 addrspace(1)* %arrayidx, align 4
- %arrayidx1 = getelementptr inbounds i32* %c, i64 %i.0
+ %arrayidx1 = getelementptr inbounds i32* %c, i64 %i.01
%1 = load i32* %arrayidx1, align 4
%add = add nsw i32 %0, %1
- %arrayidx2 = getelementptr inbounds i32* %a, i64 %i.0
+ %arrayidx2 = getelementptr inbounds i32* %a, i64 %i.01
store i32 %add, i32* %arrayidx2, align 4
- %inc = add i64 %i.0, 1
- br label %for.cond
+ %inc = add i64 %i.01, 1
+ %cmp = icmp ult i64 %inc, 200
+ br i1 %cmp, label %for.body, label %for.end
-for.end: ; preds = %for.cond
+for.end: ; preds = %for.body
ret void
}
@@ -89,26 +83,24 @@ define void @add_ints_as_0_1_1(i32* %a, i32 addrspace(1)* %b, i32 addrspace(1)*
; CHECK-LABEL: @add_ints_as_0_1_1(
; CHECK-NOT: <4 x i32>
; CHECK: ret
-entry:
- br label %for.cond
-for.cond: ; preds = %for.body, %entry
- %i.0 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
- %cmp = icmp ult i64 %i.0, 200
- br i1 %cmp, label %for.body, label %for.end
+entry:
+ br label %for.body
-for.body: ; preds = %for.cond
- %arrayidx = getelementptr inbounds i32 addrspace(1)* %b, i64 %i.0
+for.body: ; preds = %entry, %for.body
+ %i.01 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+ %arrayidx = getelementptr inbounds i32 addrspace(1)* %b, i64 %i.01
%0 = load i32 addrspace(1)* %arrayidx, align 4
- %arrayidx1 = getelementptr inbounds i32 addrspace(1)* %c, i64 %i.0
+ %arrayidx1 = getelementptr inbounds i32 addrspace(1)* %c, i64 %i.01
%1 = load i32 addrspace(1)* %arrayidx1, align 4
%add = add nsw i32 %0, %1
- %arrayidx2 = getelementptr inbounds i32* %a, i64 %i.0
+ %arrayidx2 = getelementptr inbounds i32* %a, i64 %i.01
store i32 %add, i32* %arrayidx2, align 4
- %inc = add i64 %i.0, 1
- br label %for.cond
+ %inc = add i64 %i.01, 1
+ %cmp = icmp ult i64 %inc, 200
+ br i1 %cmp, label %for.body, label %for.end
-for.end: ; preds = %for.cond
+for.end: ; preds = %for.body
ret void
}
@@ -116,26 +108,24 @@ define void @add_ints_as_0_1_2(i32* %a, i32 addrspace(1)* %b, i32 addrspace(2)*
; CHECK-LABEL: @add_ints_as_0_1_2(
; CHECK-NOT: <4 x i32>
; CHECK: ret
-entry:
- br label %for.cond
-for.cond: ; preds = %for.body, %entry
- %i.0 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
- %cmp = icmp ult i64 %i.0, 200
- br i1 %cmp, label %for.body, label %for.end
+entry:
+ br label %for.body
-for.body: ; preds = %for.cond
- %arrayidx = getelementptr inbounds i32 addrspace(1)* %b, i64 %i.0
+for.body: ; preds = %entry, %for.body
+ %i.01 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+ %arrayidx = getelementptr inbounds i32 addrspace(1)* %b, i64 %i.01
%0 = load i32 addrspace(1)* %arrayidx, align 4
- %arrayidx1 = getelementptr inbounds i32 addrspace(2)* %c, i64 %i.0
+ %arrayidx1 = getelementptr inbounds i32 addrspace(2)* %c, i64 %i.01
%1 = load i32 addrspace(2)* %arrayidx1, align 4
%add = add nsw i32 %0, %1
- %arrayidx2 = getelementptr inbounds i32* %a, i64 %i.0
+ %arrayidx2 = getelementptr inbounds i32* %a, i64 %i.01
store i32 %add, i32* %arrayidx2, align 4
- %inc = add i64 %i.0, 1
- br label %for.cond
+ %inc = add i64 %i.01, 1
+ %cmp = icmp ult i64 %inc, 200
+ br i1 %cmp, label %for.body, label %for.end
-for.end: ; preds = %for.cond
+for.end: ; preds = %for.body
ret void
}
diff --git a/test/Transforms/LoopVectorize/unsized-pointee-crash.ll b/test/Transforms/LoopVectorize/unsized-pointee-crash.ll
new file mode 100644
index 000000000000..5cc98378b952
--- /dev/null
+++ b/test/Transforms/LoopVectorize/unsized-pointee-crash.ll
@@ -0,0 +1,24 @@
+; RUN: opt -S -loop-vectorize < %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; CHECK-LABEL: @fn1
+define void @fn1() {
+entry:
+ br label %for.body
+
+for.body:
+ %b.05 = phi i32 (...)* [ undef, %entry ], [ %1, %for.body ]
+ %a.04 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+ %0 = bitcast i32 (...)* %b.05 to i8*
+ %add.ptr = getelementptr i8* %0, i64 1
+ %1 = bitcast i8* %add.ptr to i32 (...)*
+; CHECK: %[[cst:.*]] = bitcast i32 (...)* {{.*}} to i8*
+; CHECK-NEXT: %[[gep:.*]] = getelementptr i8* %[[cst]], i64 1
+ %inc = add nsw i32 %a.04, 1
+ %exitcond = icmp eq i32 %a.04, 63
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+ ret void
+}
diff --git a/test/Transforms/LoopVectorize/vect.stats.ll b/test/Transforms/LoopVectorize/vect.stats.ll
index 92ec24f726ee..9adf2bb6cf14 100644
--- a/test/Transforms/LoopVectorize/vect.stats.ll
+++ b/test/Transforms/LoopVectorize/vect.stats.ll
@@ -13,53 +13,47 @@ target triple = "x86_64-unknown-linux-gnu"
define void @vectorized(float* nocapture %a, i64 %size) {
entry:
- %cmp1 = icmp sgt i64 %size, 0
- br i1 %cmp1, label %for.header, label %for.end
-
-for.header:
- %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
- %cmp2 = icmp sgt i64 %indvars.iv, %size
- br i1 %cmp2, label %for.end, label %for.body
-
-for.body:
-
- %arrayidx = getelementptr inbounds float* %a, i64 %indvars.iv
+ %cmp1 = icmp sle i64 %size, 0
+ %cmp21 = icmp sgt i64 0, %size
+ %or.cond = or i1 %cmp1, %cmp21
+ br i1 %or.cond, label %for.end, label %for.body
+
+for.body: ; preds = %entry, %for.body
+ %indvars.iv2 = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds float* %a, i64 %indvars.iv2
%0 = load float* %arrayidx, align 4
%mul = fmul float %0, %0
store float %mul, float* %arrayidx, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv2, 1
+ %cmp2 = icmp sgt i64 %indvars.iv.next, %size
+ br i1 %cmp2, label %for.end, label %for.body
- %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
- br label %for.header
-
-for.end:
+for.end: ; preds = %entry, %for.body
ret void
}
define void @not_vectorized(float* nocapture %a, i64 %size) {
entry:
- %cmp1 = icmp sgt i64 %size, 0
- br i1 %cmp1, label %for.header, label %for.end
-
-for.header:
- %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
- %cmp2 = icmp sgt i64 %indvars.iv, %size
- br i1 %cmp2, label %for.end, label %for.body
-
-for.body:
-
- %0 = add nsw i64 %indvars.iv, -5
+ %cmp1 = icmp sle i64 %size, 0
+ %cmp21 = icmp sgt i64 0, %size
+ %or.cond = or i1 %cmp1, %cmp21
+ br i1 %or.cond, label %for.end, label %for.body
+
+for.body: ; preds = %entry, %for.body
+ %indvars.iv2 = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %0 = add nsw i64 %indvars.iv2, -5
%arrayidx = getelementptr inbounds float* %a, i64 %0
%1 = load float* %arrayidx, align 4
- %2 = add nsw i64 %indvars.iv, 2
+ %2 = add nsw i64 %indvars.iv2, 2
%arrayidx2 = getelementptr inbounds float* %a, i64 %2
%3 = load float* %arrayidx2, align 4
%mul = fmul float %1, %3
- %arrayidx4 = getelementptr inbounds float* %a, i64 %indvars.iv
+ %arrayidx4 = getelementptr inbounds float* %a, i64 %indvars.iv2
store float %mul, float* %arrayidx4, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv2, 1
+ %cmp2 = icmp sgt i64 %indvars.iv.next, %size
+ br i1 %cmp2, label %for.end, label %for.body
- %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
- br label %for.header
-
-for.end:
+for.end: ; preds = %entry, %for.body
ret void
} \ No newline at end of file
diff --git a/test/Transforms/SROA/phi-and-select.ll b/test/Transforms/SROA/phi-and-select.ll
index 8d82964dcbd1..946bc40b0587 100644
--- a/test/Transforms/SROA/phi-and-select.ll
+++ b/test/Transforms/SROA/phi-and-select.ll
@@ -501,3 +501,37 @@ end:
; CHECK-NOT: load
; CHECK: ret float %[[phi]]
}
+
+%struct.S = type { i32 }
+
+; Verifies we fixed PR20822. We have a foldable PHI feeding a speculatable PHI
+; which requires the rewriting of the speculated PHI to handle insertion
+; when the incoming pointer is itself from a PHI node. We would previously
+; insert a bitcast instruction *before* a PHI, producing an invalid module;
+; make sure we insert *after* the first non-PHI instruction.
+define void @PR20822() {
+; CHECK-LABEL: @PR20822(
+entry:
+ %f = alloca %struct.S, align 4
+; CHECK: %[[alloca:.*]] = alloca
+; CHECK: %[[cast:.*]] = bitcast i32* %[[alloca]] to %struct.S*
+ br i1 undef, label %if.end, label %for.cond
+
+for.cond: ; preds = %for.cond, %entry
+ br label %if.end
+
+if.end: ; preds = %for.cond, %entry
+ %f2 = phi %struct.S* [ %f, %entry ], [ %f, %for.cond ]
+; CHECK: phi {{.*}} %[[cast]]
+; CHECK: phi i32
+ phi i32 [ undef, %entry ], [ undef, %for.cond ]
+ br i1 undef, label %if.then5, label %if.then2
+
+if.then2: ; preds = %if.end
+ br label %if.then5
+
+if.then5: ; preds = %if.then2, %if.end
+ %f1 = phi %struct.S* [ undef, %if.then2 ], [ %f2, %if.end ]
+ store %struct.S undef, %struct.S* %f1, align 4
+ ret void
+}
diff --git a/unittests/Analysis/LazyCallGraphTest.cpp b/unittests/Analysis/LazyCallGraphTest.cpp
index d7c70453c9b0..7eb87dc02d64 100644
--- a/unittests/Analysis/LazyCallGraphTest.cpp
+++ b/unittests/Analysis/LazyCallGraphTest.cpp
@@ -39,21 +39,23 @@ std::unique_ptr<Module> parseAssembly(const char *Assembly) {
return M;
}
-// IR forming a call graph with a diamond of triangle-shaped SCCs:
-//
-// d1
-// / \
-// d3--d2
-// / \
-// b1 c1
-// / \ / \
-// b3--b2 c3--c2
-// \ /
-// a1
-// / \
-// a3--a2
-//
-// All call edges go up between SCCs, and clockwise around the SCC.
+/*
+ IR forming a call graph with a diamond of triangle-shaped SCCs:
+
+ d1
+ / \
+ d3--d2
+ / \
+ b1 c1
+ / \ / \
+ b3--b2 c3--c2
+ \ /
+ a1
+ / \
+ a3--a2
+
+ All call edges go up between SCCs, and clockwise around the SCC.
+ */
static const char DiamondOfTriangles[] =
"define void @a1() {\n"
"entry:\n"
diff --git a/utils/TableGen/CallingConvEmitter.cpp b/utils/TableGen/CallingConvEmitter.cpp
index 6d43e8ed3877..ec2251d934c0 100644
--- a/utils/TableGen/CallingConvEmitter.cpp
+++ b/utils/TableGen/CallingConvEmitter.cpp
@@ -35,23 +35,26 @@ private:
} // End anonymous namespace
void CallingConvEmitter::run(raw_ostream &O) {
-
std::vector<Record*> CCs = Records.getAllDerivedDefinitions("CallingConv");
-
- // Emit prototypes for all of the CC's so that they can forward ref each
- // other.
+
+ // Emit prototypes for all of the non-custom CC's so that they can forward ref
+ // each other.
+ for (unsigned i = 0, e = CCs.size(); i != e; ++i) {
+ if (!CCs[i]->getValueAsBit("Custom")) {
+ O << "static bool " << CCs[i]->getName()
+ << "(unsigned ValNo, MVT ValVT,\n"
+ << std::string(CCs[i]->getName().size() + 13, ' ')
+ << "MVT LocVT, CCValAssign::LocInfo LocInfo,\n"
+ << std::string(CCs[i]->getName().size() + 13, ' ')
+ << "ISD::ArgFlagsTy ArgFlags, CCState &State);\n";
+ }
+ }
+
+ // Emit each non-custom calling convention description in full.
for (unsigned i = 0, e = CCs.size(); i != e; ++i) {
- O << "static bool " << CCs[i]->getName()
- << "(unsigned ValNo, MVT ValVT,\n"
- << std::string(CCs[i]->getName().size()+13, ' ')
- << "MVT LocVT, CCValAssign::LocInfo LocInfo,\n"
- << std::string(CCs[i]->getName().size()+13, ' ')
- << "ISD::ArgFlagsTy ArgFlags, CCState &State);\n";
+ if (!CCs[i]->getValueAsBit("Custom"))
+ EmitCallingConv(CCs[i], O);
}
-
- // Emit each calling convention description in full.
- for (unsigned i = 0, e = CCs.size(); i != e; ++i)
- EmitCallingConv(CCs[i], O);
}
@@ -227,6 +230,21 @@ void CallingConvEmitter::EmitAction(Record *Action,
<< IndentStr << "else\n"
<< IndentStr << IndentStr << "LocInfo = CCValAssign::AExt;\n";
}
+ } else if (Action->isSubClassOf("CCPromoteToUpperBitsInType")) {
+ Record *DestTy = Action->getValueAsDef("DestTy");
+ MVT::SimpleValueType DestVT = getValueType(DestTy);
+ O << IndentStr << "LocVT = " << getEnumName(DestVT) << ";\n";
+ if (MVT(DestVT).isFloatingPoint()) {
+ PrintFatalError("CCPromoteToUpperBitsInType does not handle floating "
+ "point");
+ } else {
+ O << IndentStr << "if (ArgFlags.isSExt())\n"
+ << IndentStr << IndentStr << "LocInfo = CCValAssign::SExtUpper;\n"
+ << IndentStr << "else if (ArgFlags.isZExt())\n"
+ << IndentStr << IndentStr << "LocInfo = CCValAssign::ZExtUpper;\n"
+ << IndentStr << "else\n"
+ << IndentStr << IndentStr << "LocInfo = CCValAssign::AExtUpper;\n";
+ }
} else if (Action->isSubClassOf("CCBitConvertToType")) {
Record *DestTy = Action->getValueAsDef("DestTy");
O << IndentStr << "LocVT = " << getEnumName(getValueType(DestTy)) <<";\n";
diff --git a/utils/lit/lit/Test.py b/utils/lit/lit/Test.py
index e51bf1297762..2e0f478337bc 100644
--- a/utils/lit/lit/Test.py
+++ b/utils/lit/lit/Test.py
@@ -1,4 +1,5 @@
import os
+from xml.sax.saxutils import escape
# Test result codes.
@@ -194,3 +195,17 @@ class Test:
return True
return False
+
+
+ def getJUnitXML(self):
+ test_name = self.path_in_suite[-1]
+ test_path = self.path_in_suite[:-1]
+
+ xml = "<testcase classname='" + self.suite.name + "." + "/".join(test_path) + "'" + " name='" + test_name + "'"
+ xml += " time='%.2f'" % (self.result.elapsed,)
+ if self.result.code.isFailure:
+ xml += ">\n\t<failure >\n" + escape(self.result.output)
+ xml += "\n\t</failure>\n</testcase>"
+ else:
+ xml += "/>"
+ return xml \ No newline at end of file
diff --git a/utils/lit/lit/main.py b/utils/lit/lit/main.py
index 1f62e3563e80..6758b6037c04 100755
--- a/utils/lit/lit/main.py
+++ b/utils/lit/lit/main.py
@@ -194,6 +194,9 @@ def main(builtinParameters = {}):
group.add_option("", "--no-execute", dest="noExecute",
help="Don't execute any tests (assume PASS)",
action="store_true", default=False)
+ group.add_option("", "--xunit-xml-output", dest="xunit_output_file",
+ help=("Write XUnit-compatible XML test reports to the"
+ " specified file"), default=None)
parser.add_option_group(group)
group = OptionGroup(parser, "Test Selection")
@@ -285,10 +288,10 @@ def main(builtinParameters = {}):
if opts.showSuites or opts.showTests:
# Aggregate the tests by suite.
suitesAndTests = {}
- for t in run.tests:
- if t.suite not in suitesAndTests:
- suitesAndTests[t.suite] = []
- suitesAndTests[t.suite].append(t)
+ for result_test in run.tests:
+ if result_test.suite not in suitesAndTests:
+ suitesAndTests[result_test.suite] = []
+ suitesAndTests[result_test.suite].append(result_test)
suitesAndTests = list(suitesAndTests.items())
suitesAndTests.sort(key = lambda item: item[0].name)
@@ -321,8 +324,8 @@ def main(builtinParameters = {}):
except:
parser.error("invalid regular expression for --filter: %r" % (
opts.filter))
- run.tests = [t for t in run.tests
- if rex.search(t.getFullName())]
+ run.tests = [result_test for result_test in run.tests
+ if rex.search(result_test.getFullName())]
# Then select the order.
if opts.shuffle:
@@ -330,7 +333,7 @@ def main(builtinParameters = {}):
elif opts.incremental:
sort_by_incremental_cache(run)
else:
- run.tests.sort(key = lambda t: t.getFullName())
+ run.tests.sort(key = lambda result_test: result_test.getFullName())
# Finally limit the number of tests, if desired.
if opts.maxTests is not None:
@@ -415,6 +418,36 @@ def main(builtinParameters = {}):
if N:
print(' %s: %d' % (name,N))
+ if opts.xunit_output_file:
+ # Collect the tests, indexed by test suite
+ by_suite = {}
+ for result_test in run.tests:
+ suite = result_test.suite.config.name
+ if suite not in by_suite:
+ by_suite[suite] = {
+ 'passes' : 0,
+ 'failures' : 0,
+ 'tests' : [] }
+ by_suite[suite]['tests'].append(result_test)
+ if result_test.result.code.isFailure:
+ by_suite[suite]['failures'] += 1
+ else:
+ by_suite[suite]['passes'] += 1
+ xunit_output_file = open(opts.xunit_output_file, "w")
+ xunit_output_file.write("<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n")
+ xunit_output_file.write("<testsuites>\n")
+ for suite_name, suite in by_suite.items():
+ xunit_output_file.write("<testsuite name='" + suite_name + "'")
+ xunit_output_file.write(" tests='" + str(suite['passes'] +
+ suite['failures']) + "'")
+ xunit_output_file.write(" failures='" + str(suite['failures']) +
+ "'>\n")
+ for result_test in suite['tests']:
+ xunit_output_file.write(result_test.getJUnitXML() + "\n")
+ xunit_output_file.write("</testsuite>\n")
+ xunit_output_file.write("</testsuites>")
+ xunit_output_file.close()
+
# If we encountered any additional errors, exit abnormally.
if litConfig.numErrors:
sys.stderr.write('\n%d error(s), exiting.\n' % litConfig.numErrors)